R Markdown
#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
##
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
##
## nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
##
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
##
## compare
## The following object is masked from 'package:class':
##
## knn
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8 2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## The following object is masked from 'package:kernlab':
##
## alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine() masks randomForest::combine()
## ✖ purrr::compose() masks igraph::compose()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::margin() masks randomForest::margin()
## ✖ purrr::none() masks locfit::none()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ purrr::simplify() masks igraph::simplify()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
##
## The following objects are masked from 'package:rstanarm':
##
## compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
##
## Attaching package: 'TDA'
##
## The following object is masked from 'package:cluster':
##
## silhouette
library(TDAstats)
library(ks)
##
## Attaching package: 'ks'
##
## The following object is masked from 'package:TDA':
##
## kde
##
## The following object is masked from 'package:MCMCpack':
##
## vech
##
## The following object is masked from 'package:igraph':
##
## compare
##
## The following object is masked from 'package:BayesFactor':
##
## compare
#install.packages('MLmetrics')
library(MLmetrics)
##
## Attaching package: 'MLmetrics'
##
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
##
## The following object is masked from 'package:base':
##
## Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#import adult dataset from UCI repository stored on my desktop
#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
head(str(adult))
## 'data.frame': 32561 obs. of 15 variables:
## $ V1 : int 39 50 38 53 28 37 49 52 31 42 ...
## $ V2 : chr " State-gov" " Self-emp-not-inc" " Private" " Private" ...
## $ V3 : int 77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
## $ V4 : chr " Bachelors" " Bachelors" " HS-grad" " 11th" ...
## $ V5 : int 13 13 9 7 13 14 5 9 14 13 ...
## $ V6 : chr " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
## $ V7 : chr " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
## $ V8 : chr " Not-in-family" " Husband" " Not-in-family" " Husband" ...
## $ V9 : chr " White" " White" " White" " Black" ...
## $ V10: chr " Male" " Male" " Male" " Male" ...
## $ V11: int 2174 0 0 0 0 0 0 0 14084 5178 ...
## $ V12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V13: int 40 13 40 40 40 40 16 45 50 40 ...
## $ V14: chr " United-States" " United-States" " United-States" " United-States" ...
## $ V15: chr " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
summary(adult)
## V1 V2 V3 V4
## Min. :17.00 Length:32561 Min. : 12285 Length:32561
## 1st Qu.:28.00 Class :character 1st Qu.: 117827 Class :character
## Median :37.00 Mode :character Median : 178356 Mode :character
## Mean :38.58 Mean : 189778
## 3rd Qu.:48.00 3rd Qu.: 237051
## Max. :90.00 Max. :1484705
## V5 V6 V7 V8
## Min. : 1.00 Length:32561 Length:32561 Length:32561
## 1st Qu.: 9.00 Class :character Class :character Class :character
## Median :10.00 Mode :character Mode :character Mode :character
## Mean :10.08
## 3rd Qu.:12.00
## Max. :16.00
## V9 V10 V11 V12
## Length:32561 Length:32561 Min. : 0 Min. : 0.0
## Class :character Class :character 1st Qu.: 0 1st Qu.: 0.0
## Mode :character Mode :character Median : 0 Median : 0.0
## Mean : 1078 Mean : 87.3
## 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :99999 Max. :4356.0
## V13 V14 V15
## Min. : 1.00 Length:32561 Length:32561
## 1st Qu.:40.00 Class :character Class :character
## Median :40.00 Mode :character Mode :character
## Mean :40.44
## 3rd Qu.:45.00
## Max. :99.00
ggpairs(adult[,c(1,3,5,11,12,13,15)])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Add Bayesian tests functions
#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 3000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
#for the moment we implement the sign test. Signedrank will follows
probLeft <- mean (diffVector < rope_min)
probRope <- mean (diffVector > rope_min & diffVector < rope_max)
probRight <- mean (diffVector > rope_max)
results = list ("probLeft"=probLeft, "probRope"=probRope,
"probRight"=probRight)
return (results)
}
##Create function to conduct Bayesian Signed Rank Test
BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 30000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
sampledWeights <- rdirichlet(samples,weights)
winLeft <- vector(length = samples)
winRope <- vector(length = samples)
winRight <- vector(length = samples)
for (rep in 1:samples){
currentWeights <- sampledWeights[rep,]
for (i in 1:length(currentWeights)){
for (j in 1:length(currentWeights)){
product= currentWeights[i] * currentWeights[j]
if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
winRight[rep] <- winRight[rep] + product
}
else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
winRope[rep] <- winRope[rep] + product
}
else {
winLeft[rep] <- winLeft[rep] + product
}
}
}
maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
winRight[rep] <- (winRight[rep]==maxWins)*1/winners
winRope[rep] <- (winRope[rep]==maxWins)*1/winners
winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
}
results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
"winRight"=mean(winRight) )
return (results)
}
#Create function to conduct the Bayesian Correlated t.test
#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.
#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
if (rope_max < rope_min){
stop("rope_max should be larger than rope_min")
}
delta <- mean(diff_a_b)
n <- length(diff_a_b)
df <- n-1
stdX <- sd(diff_a_b)
sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
p.left <- pt((rope_min - delta)/sp, df)
p.rope <- pt((rope_max - delta)/sp, df)-p.left
results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)
#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)
#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))
#str final data frame
head(str(adult.one_hot_df))
## 'data.frame': 32561 obs. of 110 variables:
## $ V1 : num 39 50 38 53 28 37 49 52 31 42 ...
## $ V2.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 0 0 1 1 1 1 1 0 1 1 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 1 0 0 0 0 0 1 0 0 ...
## $ V2.State.gov : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 77516 83311 215646 234721 338409 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 1 0 0 0 0 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 1 1 0 0 1 0 0 0 0 1 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 1 0 0 0 0 1 0 0 ...
## $ V4.Masters : num 0 0 0 0 0 1 0 0 1 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V5 : num 13 13 9 7 13 14 5 9 14 13 ...
## $ V6.Divorced : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 0 1 0 1 1 1 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V6.Never.married : num 1 0 0 0 0 0 0 0 1 0 ...
## $ V6.Separated : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Adm.clerical : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 1 0 0 0 1 0 1 0 1 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 0 1 0 1 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 1 0 1 0 0 0 1 0 1 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Unmarried : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 1 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 1 1 0 1 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 0 0 1 0 1 1 1 ...
## $ V10.Female : num 0 0 0 0 1 1 1 0 1 0 ...
## $ V10.Male : num 1 1 1 1 0 0 0 1 0 1 ...
## $ V11 : num 2174 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 13 40 40 40 40 16 45 50 40 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]
##Persistent homology of adult dataset
#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset.
adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame': 1000 obs. of 110 variables:
## $ V1 : num 33 25 39 21 32 26 20 58 24 63 ...
## $ V2.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 1 1 1 1 1 1 0 0 1 0 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 0 0 0 0 0 0 0 0 1 ...
## $ V2.State.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 176992 105693 234901 198050 134886 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 0 0 0 0 1 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 0 1 0 0 0 1 0 0 0 0 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V4.Masters : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 1 0 0 1 ...
## $ V5 : num 14 13 12 12 9 13 10 7 9 10 ...
## $ V6.Divorced : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 1 0 0 0 1 0 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Never.married : num 0 1 0 1 0 1 1 0 1 0 ...
## $ V6.Separated : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V7.Adm.clerical : num 0 0 1 1 1 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 0 0 1 1 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 1 1 0 0 0 1 0 0 0 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 1 0 0 0 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 0 1 0 1 0 1 0 0 0 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 1 0 ...
## $ V8.Unmarried : num 0 0 1 0 0 0 1 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V10.Female : num 0 1 0 1 1 1 1 0 1 0 ...
## $ V10.Male : num 1 0 1 0 0 0 0 1 0 1 ...
## $ V11 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 40 40 25 40 40 20 16 25 48 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
summary(adult.one_hot_1000_df)
## V1 V2.. V2.Federal.gov V2.Local.gov
## Min. :17.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:28.00 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :37.00 Median :0.000 Median :0.000 Median :0.000
## Mean :38.64 Mean :0.077 Mean :0.025 Mean :0.064
## 3rd Qu.:47.00 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :90.00 Max. :1.000 Max. :1.000 Max. :1.000
## V2.Never.worked V2.Private V2.Self.emp.inc V2.Self.emp.not.inc
## Min. :0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :1.000 Median :0.000 Median :0.000
## Mean :0 Mean :0.679 Mean :0.037 Mean :0.079
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000 Max. :1.000
## V2.State.gov V2.Without.pay V3 V4.10th
## Min. :0.000 Min. :0 Min. : 19302 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:123797 1st Qu.:0.000
## Median :0.000 Median :0 Median :181982 Median :0.000
## Mean :0.039 Mean :0 Mean :195583 Mean :0.041
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:242529 3rd Qu.:0.000
## Max. :1.000 Max. :0 Max. :721161 Max. :1.000
## V4.11th V4.12th V4.1st.4th V4.5th.6th
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.032 Mean :0.015 Mean :0.005 Mean :0.015
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V4.7th.8th V4.9th V4.Assoc.acdm V4.Assoc.voc V4.Bachelors
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.00 Median :0.000 Median :0.000
## Mean :0.015 Mean :0.018 Mean :0.04 Mean :0.052 Mean :0.155
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.00 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.00 Max. :1.000 Max. :1.000
## V4.Doctorate V4.HS.grad V4.Masters V4.Preschool
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.014 Mean :0.327 Mean :0.053 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V4.Prof.school V4.Some.college V5 V6.Divorced
## Min. :0.000 Min. :0.000 Min. : 1 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 9 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :10 Median :0.000
## Mean :0.014 Mean :0.202 Mean :10 Mean :0.132
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:12 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :16 Max. :1.000
## V6.Married.AF.spouse V6.Married.civ.spouse V6.Married.spouse.absent
## Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0.464 Mean :0.005
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000
## V6.Never.married V6.Separated V6.Widowed V7..
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.325 Mean :0.041 Mean :0.033 Mean :0.077
## 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Adm.clerical V7.Armed.Forces V7.Craft.repair V7.Exec.managerial
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.117 Mean :0.001 Mean :0.129 Mean :0.124
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Farming.fishing V7.Handlers.cleaners V7.Machine.op.inspct V7.Other.service
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.029 Mean :0.041 Mean :0.071 Mean :0.091
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Priv.house.serv V7.Prof.specialty V7.Protective.serv V7.Sales
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.003 Mean :0.119 Mean :0.018 Mean :0.102
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Tech.support V7.Transport.moving V8.Husband V8.Not.in.family
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.00 Median :0.000
## Mean :0.035 Mean :0.043 Mean :0.41 Mean :0.261
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:1.00 3rd Qu.:1.000
## Max. :1.000 Max. :1.000 Max. :1.00 Max. :1.000
## V8.Other.relative V8.Own.child V8.Unmarried V8.Wife
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.027 Mean :0.136 Mean :0.115 Mean :0.051
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V9.Amer.Indian.Eskimo V9.Asian.Pac.Islander V9.Black V9.Other
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.014 Mean :0.029 Mean :0.104 Mean :0.007
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V9.White V10.Female V10.Male V11
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0.0
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 0.0
## Median :1.000 Median :0.000 Median :1.000 Median : 0.0
## Mean :0.846 Mean :0.339 Mean :0.661 Mean : 868.9
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.: 0.0
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :99999.0
## V12 V13 V14.. V14.Cambodia V14.Canada
## Min. : 0.00 Min. : 1.0 Min. :0.000 Min. :0 Min. :0.000
## 1st Qu.: 0.00 1st Qu.:40.0 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000
## Median : 0.00 Median :40.0 Median :0.000 Median :0 Median :0.000
## Mean : 92.56 Mean :40.5 Mean :0.024 Mean :0 Mean :0.003
## 3rd Qu.: 0.00 3rd Qu.:45.0 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000
## Max. :2457.00 Max. :99.0 Max. :1.000 Max. :0 Max. :1.000
## V14.China V14.Columbia V14.Cuba V14.Dominican.Republic
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.003 Mean :0.002 Mean :0.005 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Ecuador V14.El.Salvador V14.England V14.France
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.001 Mean :0.003 Mean :0.003 Mean :0.001
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Germany V14.Greece V14.Guatemala V14.Haiti
## Min. :0.000 Min. :0.000 Min. :0 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0 Median :0.000
## Mean :0.002 Mean :0.002 Mean :0 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :0 Max. :1.000
## V14.Holand.Netherlands V14.Honduras V14.Hong V14.Hungary
## Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0 Mean :0.001 Mean :0.002
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.India V14.Iran V14.Ireland V14.Italy V14.Jamaica
## Min. :0.000 Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0.004 Mean :0 Mean :0 Mean :0.003 Mean :0.003
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.Japan V14.Laos V14.Mexico V14.Nicaragua
## Min. :0.000 Min. :0 Min. :0.000 Min. :0
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000 1st Qu.:0
## Median :0.000 Median :0 Median :0.000 Median :0
## Mean :0.003 Mean :0 Mean :0.022 Mean :0
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0
## Max. :1.000 Max. :0 Max. :1.000 Max. :0
## V14.Outlying.US.Guam.USVI.etc. V14.Peru V14.Philippines V14.Poland
## Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0 Mean :0.004 Mean :0.002
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.Portugal V14.Puerto.Rico V14.Scotland V14.South V14.Taiwan
## Min. :0.000 Min. :0.000 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0 Median :0.000 Median :0.000
## Mean :0.001 Mean :0.004 Mean :0 Mean :0.001 Mean :0.001
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :0 Max. :1.000 Max. :1.000
## V14.Thailand V14.Trinadad.Tobago V14.United.States V14.Vietnam
## Min. :0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.000
## Median :0 Median :0.000 Median :1.000 Median :0.000
## Mean :0 Mean :0.002 Mean :0.891 Mean :0.003
## 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Yugoslavia V15...50K V15..50K
## Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:1.000 1st Qu.:0.000
## Median :0 Median :1.000 Median :0.000
## Mean :0 Mean :0.769 Mean :0.231
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)
# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
##Two Filter Functions PCA & KDE
#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))
#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate
###*** Adult PCA Mapper 5 intervals, 60% overlap, 5 bins
##*** Adult PCA Mapper 5 intervals, 60% overlap, 5 bins
m_adult_5.60.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 60,
num_bins_when_clustering = 5)
g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.60.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.60.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.60.5$points_in_vertex))
## List of 5
## $ : int [1:6560] 2 8 10 11 12 15 21 26 28 39 ...
## $ : int [1:13933] 2 8 10 11 12 15 19 20 21 23 ...
## $ : int [1:15744] 1 2 3 4 5 6 9 11 15 16 ...
## $ : int [1:19829] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:16508] 1 3 5 7 13 14 17 18 22 25 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.60.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.60.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

m_adult_5.60.5.n1<-m_adult_5.60.5$points_in_vertex[1]
m_adult_5.60.5.n1.vec<-as.vector(unlist(m_adult_5.60.5.n1))
m_adult_5.60.5.n2<-m_adult_5.60.5$points_in_vertex[2]
m_adult_5.60.5.n2.vec<-as.vector(unlist(m_adult_5.60.5.n2))
m_adult_5.60.5.n3<-m_adult_5.60.5$points_in_vertex[3]
m_adult_5.60.5.n3.vec<-as.vector(unlist(m_adult_5.60.5.n3))
m_adult_5.60.5.n4<-m_adult_5.60.5$points_in_vertex[4]
m_adult_5.60.5.n4.vec<-as.vector(unlist(m_adult_5.60.5.n4))
m_adult_5.60.5.n5<-m_adult_5.60.5$points_in_vertex[5]
m_adult_5.60.5.n5.vec<-as.vector(unlist(m_adult_5.60.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_adult_5.60.5.n1.vec,]
tda.m_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_adult_5.60.5.n2.vec,]
tda.m_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_adult_5.60.5.n3.vec,]
tda.m_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_adult_5.60.5.n4.vec,]
tda.m_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_adult_5.60.5.n5.vec,]
###*** Adult Mapper 5 intervals, 50% overlap, 5 bins
m_adult_5.50.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 50,
num_bins_when_clustering = 5)
g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5))

head(str(m_adult_5.50.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.50.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.50.5$points_in_vertex))
## List of 5
## $ : int [1:4917] 8 10 12 21 26 46 64 69 73 87 ...
## $ : int [1:12206] 2 8 10 11 12 15 21 24 26 28 ...
## $ : int [1:13240] 1 2 4 5 6 9 11 15 16 19 ...
## $ : int [1:16700] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:14404] 3 7 13 14 17 18 22 25 27 32 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.50.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.50.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_adult_5.50.5.n1<-m_adult_5.50.5$points_in_vertex[1]
m_adult_5.50.5.n1.vec<-as.vector(unlist(m_adult_5.50.5.n1))
m_adult_5.50.5.n2<-m_adult_5.50.5$points_in_vertex[2]
m_adult_5.50.5.n2.vec<-as.vector(unlist(m_adult_5.50.5.n2))
m_adult_5.50.5.n3<-m_adult_5.50.5$points_in_vertex[3]
m_adult_5.50.5.n3.vec<-as.vector(unlist(m_adult_5.50.5.n3))
m_adult_5.50.5.n4<-m_adult_5.50.5$points_in_vertex[4]
m_adult_5.50.5.n4.vec<-as.vector(unlist(m_adult_5.50.5.n4))
m_adult_5.50.5.n5<-m_adult_5.50.5$points_in_vertex[5]
m_adult_5.50.5.n5.vec<-as.vector(unlist(m_adult_5.50.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_adult_5.50.5.n1.vec,]
tda.m_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_adult_5.50.5.n2.vec,]
tda.m_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_adult_5.50.5.n3.vec,]
tda.m_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_adult_5.50.5.n4.vec,]
tda.m_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_adult_5.50.5.n5.vec,]
##*** Adult Mapper 5 intervals, 40% overlap, 5 bins
m_adult_5.40.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 40,
num_bins_when_clustering = 5)
g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5))

head(str(m_adult_5.40.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.40.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.40.5$points_in_vertex))
## List of 5
## $ : int [1:3373] 8 10 12 21 26 46 64 69 73 95 ...
## $ : int [1:10276] 2 8 10 11 12 15 21 24 26 28 ...
## $ : int [1:11563] 1 2 4 6 9 16 19 20 23 24 ...
## $ : int [1:14818] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:12081] 7 13 14 18 22 25 27 32 36 37 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.40.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.40.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.40.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

m_adult_5.40.5.n1<-m_adult_5.40.5$points_in_vertex[1]
m_adult_5.40.5.n1.vec<-as.vector(unlist(m_adult_5.40.5.n1))
m_adult_5.40.5.n2<-m_adult_5.40.5$points_in_vertex[2]
m_adult_5.40.5.n2.vec<-as.vector(unlist(m_adult_5.40.5.n2))
m_adult_5.40.5.n3<-m_adult_5.40.5$points_in_vertex[3]
m_adult_5.40.5.n3.vec<-as.vector(unlist(m_adult_5.40.5.n3))
m_adult_5.40.5.n4<-m_adult_5.40.5$points_in_vertex[4]
m_adult_5.40.5.n4.vec<-as.vector(unlist(m_adult_5.40.5.n4))
m_adult_5.40.5.n5<-m_adult_5.40.5$points_in_vertex[5]
m_adult_5.40.5.n5.vec<-as.vector(unlist(m_adult_5.40.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_adult_5.40.5.n1.vec,]
tda.m_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_adult_5.40.5.n2.vec,]
tda.m_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_adult_5.40.5.n3.vec,]
tda.m_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_adult_5.40.5.n4.vec,]
tda.m_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_adult_5.40.5.n5.vec,]
##*** Adult Mapper KDE Filter 5 intervals, 60% overlap, 5 bins
m_kde_adult_5.60.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 60,
num_bins_when_clustering = 5)
g_kde_adult_5.60.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5))

head(str(m_kde_adult_5.60.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.60.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.60.5$points_in_vertex))
## List of 5
## $ : int [1:15260] 2 4 5 6 7 9 13 16 19 20 ...
## $ : int [1:14482] 1 2 4 6 8 9 12 13 20 22 ...
## $ : int [1:13266] 1 2 8 10 11 12 13 14 27 28 ...
## $ : int [1:11795] 3 8 10 11 12 14 15 17 27 28 ...
## $ : int [1:8940] 3 15 17 18 27 32 37 39 49 55 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.60.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.60.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.60.5.n1<-m_kde_adult_5.60.5$points_in_vertex[1]
m_kde_adult_5.60.5.n1.vec<-as.vector(unlist(m_kde_adult_5.60.5.n1))
m_kde_adult_5.60.5.n2<-m_kde_adult_5.60.5$points_in_vertex[2]
m_kde_adult_5.60.5.n2.vec<-as.vector(unlist(m_kde_adult_5.60.5.n2))
m_kde_adult_5.60.5.n3<-m_kde_adult_5.60.5$points_in_vertex[3]
m_kde_adult_5.60.5.n3.vec<-as.vector(unlist(m_kde_adult_5.60.5.n3))
m_kde_adult_5.60.5.n4<-m_kde_adult_5.60.5$points_in_vertex[4]
m_kde_adult_5.60.5.n4.vec<-as.vector(unlist(m_kde_adult_5.60.5.n4))
m_kde_adult_5.60.5.n5<-m_kde_adult_5.60.5 $points_in_vertex[5]
m_kde_adult_5.60.5.n5.vec<-as.vector(unlist(m_kde_adult_5.60.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n1.vec,]
tda.m_kde_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n2.vec,]
tda.m_kde_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n3.vec,]
tda.m_kde_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n4.vec,]
tda.m_kde_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n5.vec,]
##*** Adult Mapper KDE Filter 5 intervals, 50% overlap, 5 bins
m_kde_adult_5.50.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 50,
num_bins_when_clustering = 5)
g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5))

head(str(m_kde_adult_5.50.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.50.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.50.5$points_in_vertex))
## List of 5
## $ : int [1:13387] 2 4 5 6 7 9 16 19 20 21 ...
## $ : int [1:12638] 1 2 6 8 9 13 20 24 25 26 ...
## $ : int [1:11634] 1 8 10 11 12 13 14 27 28 30 ...
## $ : int [1:10038] 3 10 11 12 14 15 27 30 32 34 ...
## $ : int [1:7540] 3 15 17 18 37 39 56 59 60 65 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.50.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.50.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.50.5.n1<-m_kde_adult_5.50.5$points_in_vertex[1]
m_kde_adult_5.50.5.n1.vec<-as.vector(unlist(m_kde_adult_5.50.5.n1))
m_kde_adult_5.50.5.n2<-m_kde_adult_5.50.5$points_in_vertex[2]
m_kde_adult_5.50.5.n2.vec<-as.vector(unlist(m_kde_adult_5.50.5.n2))
m_kde_adult_5.50.5.n3<-m_kde_adult_5.50.5$points_in_vertex[3]
m_kde_adult_5.50.5.n3.vec<-as.vector(unlist(m_kde_adult_5.50.5.n3))
m_kde_adult_5.50.5.n4<-m_kde_adult_5.50.5$points_in_vertex[4]
m_kde_adult_5.50.5.n4.vec<-as.vector(unlist(m_kde_adult_5.50.5.n4))
m_kde_adult_5.50.5.n5<-m_kde_adult_5.50.5 $points_in_vertex[5]
m_kde_adult_5.50.5.n5.vec<-as.vector(unlist(m_kde_adult_5.50.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n1.vec,]
tda.m_kde_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n2.vec,]
tda.m_kde_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n3.vec,]
tda.m_kde_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n4.vec,]
tda.m_kde_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n5.vec,]
##*** Adult Mapper KDE 5 intervals, 40% overlap, 5 bins
m_kde_adult_5.40.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 40,
num_bins_when_clustering = 5)
g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5))

head(str(m_kde_adult_5.40.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.40.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.40.5$points_in_vertex))
## List of 5
## $ : int [1:11838] 4 5 6 7 9 16 19 20 21 22 ...
## $ : int [1:11203] 1 2 6 9 13 20 24 25 26 29 ...
## $ : int [1:10351] 1 8 10 11 12 14 27 28 30 31 ...
## $ : int [1:8741] 3 10 11 12 14 15 27 30 32 34 ...
## $ : int [1:6628] 3 15 17 18 37 39 59 60 65 66 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.40.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.40.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.40.5.n1<-m_kde_adult_5.40.5$points_in_vertex[1]
m_kde_adult_5.40.5.n1.vec<-as.vector(unlist(m_kde_adult_5.40.5.n1))
m_kde_adult_5.40.5.n2<-m_kde_adult_5.40.5$points_in_vertex[2]
m_kde_adult_5.40.5.n2.vec<-as.vector(unlist(m_kde_adult_5.40.5.n2))
m_kde_adult_5.40.5.n3<-m_kde_adult_5.40.5$points_in_vertex[3]
m_kde_adult_5.40.5.n3.vec<-as.vector(unlist(m_kde_adult_5.40.5.n3))
m_kde_adult_5.40.5.n4<-m_kde_adult_5.40.5$points_in_vertex[4]
m_kde_adult_5.40.5.n4.vec<-as.vector(unlist(m_kde_adult_5.40.5.n4))
m_kde_adult_5.40.5.n5<-m_kde_adult_5.40.5 $points_in_vertex[5]
m_kde_adult_5.40.5.n5.vec<-as.vector(unlist(m_kde_adult_5.40.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF4 dataset
tda.m_kde_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n1.vec,]
tda.m_kde_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n2.vec,]
tda.m_kde_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n3.vec,]
tda.m_kde_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n4.vec,]
tda.m_kde_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n5.vec,]
library(caret)
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7,
list = FALSE,
times = 1)
head(trainIndex)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models
fitControl <- trainControl(## 10-fold CV
method = "cv",
number = 3)
#Non-TDA-Assited
rfGrid<-expand.grid(mtry = (1:20)*50)
#Random Forest
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
adultRfFit
## Random Forest
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8580265 0.5919831
## 100 0.8547799 0.5844959
## 150 0.8547799 0.5843957
## 200 0.8544289 0.5830623
## 250 0.8542096 0.5826708
## 300 0.8543851 0.5829630
## 350 0.8546922 0.5837050
## 400 0.8547361 0.5846706
## 450 0.8546044 0.5832432
## 500 0.8545605 0.5837993
## 550 0.8546482 0.5841542
## 600 0.8544289 0.5831200
## 650 0.8546483 0.5842098
## 700 0.8547799 0.5837997
## 750 0.8539902 0.5819200
## 800 0.8539463 0.5818111
## 850 0.8540780 0.5818868
## 900 0.8555696 0.5869136
## 950 0.8551309 0.5858845
## 1000 0.8545167 0.5837978
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
adultRfFit$resample
## Accuracy Kappa Resample
## 1 0.8567856 0.5897204 Fold1
## 2 0.8585154 0.5920166 Fold3
## 3 0.8587786 0.5942124 Fold2
ad_rf_fit_re<-adultRfFit$resample[1]
summary(adultRfFit)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 22793 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 45586 matrix numeric
## oob.times 22793 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 22793 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(adultRfFit,25) + ggtitle("non-TDA-Assisted: RF")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6866 831
## >50K 550 1521
##
## Accuracy : 0.8586
## 95% CI : (0.8516, 0.8655)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5969
##
## Mcnemar's Test P-Value : 4.898e-14
##
## Sensitivity : 0.9258
## Specificity : 0.6467
## Pos Pred Value : 0.8920
## Neg Pred Value : 0.7344
## Prevalence : 0.7592
## Detection Rate : 0.7029
## Detection Prevalence : 0.7880
## Balanced Accuracy : 0.7863
##
## 'Positive' Class : <=50K
##
rf_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.586200e-01 5.968664e-01 8.515525e-01 8.654729e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.576232e-131 4.897695e-14
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9258360 0.6466837 0.8920359
## Neg Pred Value Precision Recall
## 0.7344278 0.8920359 0.9258360
## F1 Prevalence Detection Rate
## 0.9086217 0.7592138 0.7029075
## Detection Prevalence Balanced Accuracy
## 0.7879812 0.7862599
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.Preschool, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv,
## V8.Own.child, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Dominican.Republic, V14.Guatemala,
## V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica, V14.Laos,
## V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Separated, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Columbia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Widowed, V7.Priv.house.serv, V8.Own.child,
## V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands,
## V14.Honduras, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
Adult_TDA_PC_5.50.5_n1_RfFit0
## Random Forest
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3278, 3278, 3278
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9713240 0.04590433
## 100 0.9707138 0.07816387
## 150 0.9703071 0.06638083
## 200 0.9713240 0.09238626
## 250 0.9713240 0.09059836
## 300 0.9709172 0.07874975
## 350 0.9711206 0.07980320
## 400 0.9709172 0.09037011
## 450 0.9713240 0.08041228
## 500 0.9709172 0.07901827
## 550 0.9705105 0.07737894
## 600 0.9709172 0.06845593
## 650 0.9703071 0.07662230
## 700 0.9711206 0.09135968
## 750 0.9711206 0.10188522
## 800 0.9711206 0.07980320
## 850 0.9711206 0.09095599
## 900 0.9711206 0.09089212
## 950 0.9709172 0.07921732
## 1000 0.9703071 0.07636039
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9707138 0.068463305 Fold1
## 2 0.9713240 0.071589553 Fold3
## 3 0.9719341 -0.002339865 Fold2
ad_tda_pc_5.50.5_n1_rf_fit0_re<-Adult_TDA_PC_5.50.5_n1_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 4917 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 9834 matrix numeric
## oob.times 4917 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 4917 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.50.5_n1_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 43 1
## >50K 7373 2351
##
## Accuracy : 0.2451
## 95% CI : (0.2366, 0.2537)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0026
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.005798
## Specificity : 0.999575
## Pos Pred Value : 0.977273
## Neg Pred Value : 0.241773
## Prevalence : 0.759214
## Detection Rate : 0.004402
## Detection Prevalence : 0.004505
## Balanced Accuracy : 0.502687
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 43 1
## >50K 7373 2351
##
## Accuracy : 0.2451
## 95% CI : (0.2366, 0.2537)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0026
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.005798
## Specificity : 0.999575
## Pos Pred Value : 0.977273
## Neg Pred Value : 0.241773
## Prevalence : 0.759214
## Detection Rate : 0.004402
## Detection Prevalence : 0.004505
## Balanced Accuracy : 0.502687
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.245085995 0.002595523 0.236581718 0.253742513 0.759213759
## AccuracyPValue McnemarPValue
## 1.000000000 0.000000000
ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.005798274 0.999574830 0.977272727
## Neg Pred Value Precision Recall
## 0.241772933 0.977272727 0.005798274
## F1 Prevalence Detection Rate
## 0.011528150 0.759213759 0.004402129
## Detection Prevalence Balanced Accuracy
## 0.004504505 0.502686552
ad_tda_pc_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n1_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n1_3_fold
## Accuracy
## 1 -0.1139283
## 2 -0.1128086
## 3 -0.1131555
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n1_3_fold$probRight
bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0.9909333
##
## $winRope
## [1] 0.009066667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_3_fold
## $left
## [1] 0.9999932
##
## $rope
## [1] 2.039677e-06
##
## $right
## [1] 4.802523e-06
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold)
## t = -342.36, df = 2, p-value = 8.532e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1147213 -0.1118736
## sample estimates:
## mean of x
## -0.1132974
### Test set diff
diff_tda_pca_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n1_test
## Accuracy
## 0.613534
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n1_test_odds.left<-bst_tda_pca_5.50.5_rf.n1_test$probLeft/bst_tda_pca_5.50.5_rf.n1_test$probRight
bst_tda_pca_5.50.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1559667
##
## $winRight
## [1] 0.8440333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n1_test)) #bf_tda_pca_5.50.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n1_test))
##Node2
Adult_TDA_PC_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Guatemala, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
Adult_TDA_PC_5.50.5_n2_RfFit0
## Random Forest
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8136, 8138, 8138
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.7307869 0.4603031
## 100 0.7261173 0.4503848
## 150 0.7233314 0.4450216
## 200 0.7255440 0.4493088
## 250 0.7248885 0.4478596
## 300 0.7252975 0.4487869
## 350 0.7252981 0.4488116
## 400 0.7252979 0.4488017
## 450 0.7241507 0.4463752
## 500 0.7246428 0.4471666
## 550 0.7251339 0.4483310
## 600 0.7257078 0.4496379
## 650 0.7243152 0.4468307
## 700 0.7253800 0.4490015
## 750 0.7236593 0.4456333
## 800 0.7248064 0.4478642
## 850 0.7263631 0.4508071
## 900 0.7259537 0.4500862
## 950 0.7262809 0.4507923
## 1000 0.7234957 0.4450489
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.7385749 0.4752125 Fold1
## 2 0.7278761 0.4534681 Fold3
## 3 0.7259095 0.4522287 Fold2
ad_tda_pc_5.50.5_n2_rf_fit0_re<-Adult_TDA_PC_5.50.5_n2_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 12206 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 24412 matrix numeric
## oob.times 12206 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 12206 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.50.5_n2_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1936 8
## >50K 5480 2344
##
## Accuracy : 0.4382
## 95% CI : (0.4283, 0.4481)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1436
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2611
## Specificity : 0.9966
## Pos Pred Value : 0.9959
## Neg Pred Value : 0.2996
## Prevalence : 0.7592
## Detection Rate : 0.1982
## Detection Prevalence : 0.1990
## Balanced Accuracy : 0.6288
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1936 8
## >50K 5480 2344
##
## Accuracy : 0.4382
## 95% CI : (0.4283, 0.4481)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1436
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2611
## Specificity : 0.9966
## Pos Pred Value : 0.9959
## Neg Pred Value : 0.2996
## Prevalence : 0.7592
## Detection Rate : 0.1982
## Detection Prevalence : 0.1990
## Balanced Accuracy : 0.6288
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.4381654 0.1435942 0.4282945 0.4480732 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2610572 0.9965986 0.9958848
## Neg Pred Value Precision Recall
## 0.2995910 0.9958848 0.2610572
## F1 Prevalence Detection Rate
## 0.4136752 0.7592138 0.1981982
## Detection Prevalence Balanced Accuracy
## 0.1990172 0.6288279
ad_tda_pc_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n2_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n2_3_fold
## Accuracy
## 1 0.1182106
## 2 0.1306393
## 3 0.1328691
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n2_3_fold$probRight
bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008933333
##
## $winRight
## [1] 0.9910667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0007344378
##
## $rope
## [1] 0.0002711296
##
## $right
## [1] 0.9989944
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold)
## t = 27.902, df = 2, p-value = 0.001282
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1076188 0.1468605
## sample estimates:
## mean of x
## 0.1272397
### Test set diff
diff_tda_pca_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n2_test
## Accuracy
## 0.4204545
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n2_test_odds.left<-bst_tda_pca_5.50.5_rf.n2_test$probLeft/bst_tda_pca_5.50.5_rf.n2_test$probRight
bst_tda_pca_5.50.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1556333
##
## $winRight
## [1] 0.8443667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),c(-0.01,0.01)))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n2_test)) #bf_tda_pca_5.50.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.50.5_n3_RfFit0
## Random Forest
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8435796 0.5009515
## 100 0.8395766 0.4915433
## 150 0.8391235 0.4904715
## 200 0.8403321 0.4928573
## 250 0.8395013 0.4913444
## 300 0.8392746 0.4902961
## 350 0.8395012 0.4904205
## 400 0.8392745 0.4899087
## 450 0.8395012 0.4913642
## 500 0.8388215 0.4888537
## 550 0.8383684 0.4868825
## 600 0.8395766 0.4924767
## 650 0.8397279 0.4912020
## 700 0.8402565 0.4938746
## 750 0.8398033 0.4912664
## 800 0.8389725 0.4894692
## 850 0.8397278 0.4925311
## 900 0.8398789 0.4922070
## 950 0.8397277 0.4920199
## 1000 0.8393502 0.4911477
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8391117 0.4901986 Fold1
## 2 0.8495696 0.5149349 Fold3
## 3 0.8420576 0.4977208 Fold2
ad_tda_pc_5.50.5_n3_rf_fit0_re<-Adult_TDA_PC_5.50.5_n3_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 13240 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 26480 matrix numeric
## oob.times 13240 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 13240 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.50.5_n3_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4827 1137
## >50K 2589 1215
##
## Accuracy : 0.6186
## 95% CI : (0.6088, 0.6282)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1383
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6509
## Specificity : 0.5166
## Pos Pred Value : 0.8094
## Neg Pred Value : 0.3194
## Prevalence : 0.7592
## Detection Rate : 0.4942
## Detection Prevalence : 0.6106
## Balanced Accuracy : 0.5837
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4827 1137
## >50K 2589 1215
##
## Accuracy : 0.6186
## 95% CI : (0.6088, 0.6282)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1383
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6509
## Specificity : 0.5166
## Pos Pred Value : 0.8094
## Neg Pred Value : 0.3194
## Prevalence : 0.7592
## Detection Rate : 0.4942
## Detection Prevalence : 0.6106
## Balanced Accuracy : 0.5837
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.185504e-01 1.383175e-01 6.088322e-01 6.281978e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 6.677915e-125
ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6508900 0.5165816 0.8093561
## Neg Pred Value Precision Recall
## 0.3194006 0.8093561 0.6508900
## F1 Prevalence Detection Rate
## 0.7215247 0.7592138 0.4941646
## Detection Prevalence Balanced Accuracy
## 0.6105651 0.5837358
ad_tda_pc_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n3_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n3_3_fold
## Accuracy
## 1 0.017673858
## 2 0.008945847
## 3 0.016721069
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n3_3_fold$probRight
bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3009667
##
## $winRight
## [1] 0.6990333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0007344378
##
## $rope
## [1] 0.0002711296
##
## $right
## [1] 0.9989944
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold)
## t = 5.2263, df = 2, p-value = 0.03472
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.002553289 0.026340560
## sample estimates:
## mean of x
## 0.01444692
### Test set diff
diff_tda_pca_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n3_test
## Accuracy
## 0.2400696
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n3_test_odds.left<-bst_tda_pca_5.50.5_rf.n3_test$probLeft/bst_tda_pca_5.50.5_rf.n3_test$probRight
bst_tda_pca_5.50.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1568667
##
## $winRight
## [1] 0.8431333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n3_test))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n3_test)) #bf_tda_pca_5.50.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test)
##Node4
Adult_TDA_PC_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.50.5_n4_RfFit0
## Random Forest
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11134, 11133, 11133
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9517965 0.3550857
## 100 0.9519760 0.3657386
## 150 0.9515569 0.3625767
## 200 0.9514971 0.3585936
## 250 0.9517366 0.3591983
## 300 0.9514371 0.3635162
## 350 0.9517365 0.3617620
## 400 0.9516168 0.3629410
## 450 0.9516767 0.3660076
## 500 0.9515569 0.3616388
## 550 0.9513773 0.3599246
## 600 0.9514372 0.3593316
## 650 0.9514371 0.3602456
## 700 0.9517965 0.3630400
## 750 0.9518563 0.3640973
## 800 0.9512575 0.3598879
## 850 0.9519162 0.3643928
## 900 0.9513773 0.3579457
## 950 0.9513773 0.3598592
## 1000 0.9514970 0.3638851
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 100.
Adult_TDA_PC_5.50.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9518505 0.3617923 Fold1
## 2 0.9538351 0.4020191 Fold3
## 3 0.9502425 0.3334044 Fold2
ad_tda_pc_5.50.5_n4_rf_fit0_re<-Adult_TDA_PC_5.50.5_n4_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 16700 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 33400 matrix numeric
## oob.times 16700 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 16700 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.50.5_n4_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 1657
## >50K 2 695
##
## Accuracy : 0.8302
## 95% CI : (0.8226, 0.8376)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3886
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9997
## Specificity : 0.2955
## Pos Pred Value : 0.8173
## Neg Pred Value : 0.9971
## Prevalence : 0.7592
## Detection Rate : 0.7590
## Detection Prevalence : 0.9286
## Balanced Accuracy : 0.6476
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 1657
## >50K 2 695
##
## Accuracy : 0.8302
## 95% CI : (0.8226, 0.8376)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3886
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9997
## Specificity : 0.2955
## Pos Pred Value : 0.8173
## Neg Pred Value : 0.9971
## Prevalence : 0.7592
## Detection Rate : 0.7590
## Detection Prevalence : 0.9286
## Balanced Accuracy : 0.6476
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.301597e-01 3.885773e-01 8.225640e-01 8.375580e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.290367e-65 0.000000e+00
ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9997303 0.2954932 0.8173300
## Neg Pred Value Precision Recall
## 0.9971306 0.8173300 0.9997303
## F1 Prevalence Detection Rate
## 0.8993753 0.7592138 0.7590090
## Detection Prevalence Balanced Accuracy
## 0.9286446 0.6476118
ad_tda_pc_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n4_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n4_3_fold
## Accuracy
## 1 -0.09506495
## 2 -0.09531970
## 3 -0.09146387
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n4_3_fold$probRight
bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0.9912333
##
## $winRope
## [1] 0.008766667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_3_fold
## $left
## [1] 0.9998534
##
## $rope
## [1] 5.095638e-05
##
## $right
## [1] 9.560297e-05
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold)
## t = -75.462, df = 2, p-value = 0.0001756
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.09930627 -0.08859275
## sample estimates:
## mean of x
## -0.09394951
### Test set diff
diff_tda_pca_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n4_test
## Accuracy
## 0.02846028
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n4_test_odds.left<-bst_tda_pca_5.50.5_rf.n4_test$probLeft/bst_tda_pca_5.50.5_rf.n4_test$probRight
bst_tda_pca_5.50.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1595667
##
## $winRight
## [1] 0.8404333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n4_test))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n4_test)) #bf_tda_pca_5.50.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Greece,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
Adult_TDA_PC_5.50.5_n5_RfFit0
## Random Forest
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9602, 9602, 9604
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9981949 0.2132988
## 100 0.9980561 0.1972965
## 150 0.9980561 0.1972965
## 200 0.9979867 0.1908911
## 250 0.9979867 0.1908911
## 300 0.9979867 0.1908911
## 350 0.9980561 0.1972965
## 400 0.9979867 0.1908911
## 450 0.9979173 0.1439117
## 500 0.9980561 0.1972965
## 550 0.9981255 0.2375846
## 600 0.9981949 0.2449723
## 650 0.9980561 0.1972965
## 700 0.9980561 0.1972965
## 750 0.9979867 0.1908911
## 800 0.9979867 0.1908911
## 850 0.9980561 0.2311793
## 900 0.9979867 0.1908911
## 950 0.9981255 0.2375846
## 1000 0.9980561 0.1972965
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.50.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9983340 0.3328702 Fold1
## 2 0.9981250 0.0000000 Fold3
## 3 0.9981258 0.3070263 Fold2
ad_tda_pc_5.50.5_n5_rf_fit0_re<-Adult_TDA_PC_5.50.5_n5_RfFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 14404 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 28808 matrix numeric
## oob.times 14404 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 14404 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.50.5_n5_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 2100
## >50K 3 252
##
## Accuracy : 0.7847
## 95% CI : (0.7764, 0.7928)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.314e-09
##
## Kappa : 0.1534
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9996
## Specificity : 0.1071
## Pos Pred Value : 0.7792
## Neg Pred Value : 0.9882
## Prevalence : 0.7592
## Detection Rate : 0.7589
## Detection Prevalence : 0.9739
## Balanced Accuracy : 0.5534
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 2100
## >50K 3 252
##
## Accuracy : 0.7847
## 95% CI : (0.7764, 0.7928)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.314e-09
##
## Kappa : 0.1534
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9996
## Specificity : 0.1071
## Pos Pred Value : 0.7792
## Neg Pred Value : 0.9882
## Prevalence : 0.7592
## Detection Rate : 0.7589
## Detection Prevalence : 0.9739
## Balanced Accuracy : 0.5534
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.847052e-01 1.534495e-01 7.764186e-01 7.928216e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.313670e-09 0.000000e+00
ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9995955 0.1071429 0.7792494
## Neg Pred Value Precision Recall
## 0.9882353 0.7792494 0.9995955
## F1 Prevalence Detection Rate
## 0.8757753 0.7592138 0.7589066
## Detection Prevalence Balanced Accuracy
## 0.9738943 0.5533692
ad_tda_pc_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n5_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n5_3_fold
## Accuracy
## 1 -0.1415485
## 2 -0.1396096
## 3 -0.1393472
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n5_3_fold$probRight
bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.9914
##
## $winRope
## [1] 0.0086
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_3_fold
## $left
## [1] 0.999981
##
## $rope
## [1] 4.713479e-06
##
## $right
## [1] 1.424516e-05
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold)
## t = -201.92, df = 2, p-value = 2.453e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1431552 -0.1371816
## sample estimates:
## mean of x
## -0.1401684
### Test set diff
diff_tda_pca_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n5_test
## Accuracy
## 0.07391482
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_rf.n5_test_odds.left<-bst_tda_pca_5.50.5_rf.n5_test$probLeft/bst_tda_pca_5.50.5_rf.n5_test$probRight
bst_tda_pca_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.157
##
## $winRight
## [1] 0.843
# Bayesian Correlated Test
bct_tda_pca_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n5_test))
#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n5_test)) #bf_tda_pca_5.50.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_KDE_5.50.5_n1_RfFit0
## Random Forest
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8924, 8925, 8925
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8613578 0.6262153
## 100 0.8607601 0.6272384
## 150 0.8587431 0.6220310
## 200 0.8588180 0.6215913
## 250 0.8603865 0.6252692
## 300 0.8599383 0.6250645
## 350 0.8591165 0.6222775
## 400 0.8598638 0.6249253
## 450 0.8598637 0.6247009
## 500 0.8594154 0.6235399
## 550 0.8605360 0.6259093
## 600 0.8605360 0.6259602
## 650 0.8589673 0.6226026
## 700 0.8583697 0.6209440
## 750 0.8603120 0.6255933
## 800 0.8581457 0.6193261
## 850 0.8591913 0.6226179
## 900 0.8583698 0.6200272
## 950 0.8593408 0.6233352
## 1000 0.8603865 0.6254018
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8648891 0.6383603 Fold1
## 2 0.8623935 0.6307766 Fold3
## 3 0.8567907 0.6095090 Fold2
ad_tda_kde_5.50.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n1_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 13387 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 26774 matrix numeric
## oob.times 13387 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 13387 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.50.5_n1_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7126 534
## >50K 290 1818
##
## Accuracy : 0.9156
## 95% CI : (0.91, 0.9211)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7608
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9609
## Specificity : 0.7730
## Pos Pred Value : 0.9303
## Neg Pred Value : 0.8624
## Prevalence : 0.7592
## Detection Rate : 0.7295
## Detection Prevalence : 0.7842
## Balanced Accuracy : 0.8669
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7126 534
## >50K 290 1818
##
## Accuracy : 0.9156
## 95% CI : (0.91, 0.9211)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7608
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9609
## Specificity : 0.7730
## Pos Pred Value : 0.9303
## Neg Pred Value : 0.8624
## Prevalence : 0.7592
## Detection Rate : 0.7295
## Detection Prevalence : 0.7842
## Balanced Accuracy : 0.8669
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.156429e-01 7.608020e-01 9.099555e-01 9.210811e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 2.554808e-17
ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9608954 0.7729592 0.9302872
## Neg Pred Value Precision Recall
## 0.8624288 0.9302872 0.9608954
## F1 Prevalence Detection Rate
## 0.9453436 0.7592138 0.7295250
## Detection Prevalence Balanced Accuracy
## 0.7841933 0.8669273
ad_tda_kde_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n1_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n1_3_fold
## Accuracy
## 1 -0.008103515
## 2 -0.003878147
## 3 0.001987949
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n1_3_fold$probRight
bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_3_fold
## $left
## [1] 0.09355665
##
## $rope
## [1] 0.8771248
##
## $right
## [1] 0.02931854
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
#bf_tda_kde_5.50.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold)
## t = -1.1385, df = 2, p-value = 0.3729
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.015920632 0.009258157
## sample estimates:
## mean of x
## -0.003331237
### Test set diff
diff_tda_kde_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n1_test
## Accuracy
## -0.05702293
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n1_test_odds.left<-bst_tda_kde_5.50.5_rf.n1_test$probLeft/bst_tda_kde_5.50.5_rf.n1_test$probRight
bst_tda_kde_5.50.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_test
## $winLeft
## [1] 0.8407667
##
## $winRope
## [1] 0.1592333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n1_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n1_test)) #bf_tda_kde_5.50.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n1_test))
##Node2
Adult_TDA_KDE_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n2.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_KDE_5.50.5_n2_RfFit0
## Random Forest
##
## 12638 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8425, 8425, 8426
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8440416 0.5990016
## 100 0.8415096 0.5932256
## 150 0.8411932 0.5934107
## 200 0.8433295 0.5982069
## 250 0.8432504 0.5982755
## 300 0.8423008 0.5957367
## 350 0.8417470 0.5947305
## 400 0.8430922 0.5978009
## 450 0.8426174 0.5965623
## 500 0.8409557 0.5928548
## 550 0.8413516 0.5928332
## 600 0.8434878 0.5977601
## 650 0.8423008 0.5954840
## 700 0.8418261 0.5946200
## 750 0.8413514 0.5935380
## 800 0.8414303 0.5934740
## 850 0.8432504 0.5981819
## 900 0.8416677 0.5943368
## 950 0.8441208 0.6002420
## 1000 0.8419052 0.5948442
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 950.
Adult_TDA_KDE_5.50.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8469024 0.6054938 Fold1
## 2 0.8428300 0.5938551 Fold3
## 3 0.8426300 0.6013771 Fold2
ad_tda_KDE_5.50.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n2_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 12638 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 25276 matrix numeric
## oob.times 12638 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 12638 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.50.5_n2_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7116 554
## >50K 300 1798
##
## Accuracy : 0.9126
## 95% CI : (0.9068, 0.9181)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7517
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9595
## Specificity : 0.7645
## Pos Pred Value : 0.9278
## Neg Pred Value : 0.8570
## Prevalence : 0.7592
## Detection Rate : 0.7285
## Detection Prevalence : 0.7852
## Balanced Accuracy : 0.8620
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7116 554
## >50K 300 1798
##
## Accuracy : 0.9126
## 95% CI : (0.9068, 0.9181)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7517
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9595
## Specificity : 0.7645
## Pos Pred Value : 0.9278
## Neg Pred Value : 0.8570
## Prevalence : 0.7592
## Detection Rate : 0.7285
## Detection Prevalence : 0.7852
## Balanced Accuracy : 0.8620
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.125717e-01 7.517197e-01 9.067952e-01 9.181007e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 4.823006e-18
ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9595469 0.7644558 0.9277705
## Neg Pred Value Precision Recall
## 0.8570067 0.9277705 0.9595469
## F1 Prevalence Detection Rate
## 0.9433912 0.7592138 0.7285012
## Detection Prevalence Balanced Accuracy
## 0.7852170 0.8620014
ad_tda_kde_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.50.5_n2_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n2_3_fold
## Accuracy
## 1 0.009883128
## 2 0.015685389
## 3 0.016148671
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n2_3_fold$probRight
bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3033667
##
## $winRight
## [1] 0.6966333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_3_fold
## $left
## [1] 0.004673605
##
## $rope
## [1] 0.112999
##
## $right
## [1] 0.8823274
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
#bf_tda_kde_5.50.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold)
## t = 6.8986, df = 2, p-value = 0.02037
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.005232692 0.022578767
## sample estimates:
## mean of x
## 0.01390573
### Test set diff
diff_tda_kde_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n2_test
## Accuracy
## -0.05395168
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n2_test_odds.left<-bst_tda_kde_5.50.5_rf.n2_test$probLeft/bst_tda_kde_5.50.5_rf.n2_test$probRight
bst_tda_kde_5.50.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_test
## $winLeft
## [1] 0.841
##
## $winRope
## [1] 0.159
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n2_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n2_test)) #bf_tda_kde_5.50.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
Adult_TDA_KDE_5.50.5_n3_RfFit0
## Random Forest
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7755, 7756, 7757
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8341064 0.5700683
## 100 0.8305822 0.5610165
## 150 0.8307545 0.5623634
## 200 0.8306685 0.5623742
## 250 0.8320439 0.5661058
## 300 0.8308407 0.5626333
## 350 0.8304103 0.5614980
## 400 0.8299810 0.5610624
## 450 0.8295513 0.5595772
## 500 0.8307545 0.5619569
## 550 0.8321296 0.5653346
## 600 0.8311842 0.5632212
## 650 0.8296370 0.5592986
## 700 0.8323879 0.5669252
## 750 0.8312704 0.5637204
## 800 0.8312702 0.5629077
## 850 0.8313561 0.5640040
## 900 0.8317860 0.5661745
## 950 0.8322156 0.5662088
## 1000 0.8304966 0.5614328
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8339778 0.5695067 Fold1
## 2 0.8277018 0.5506721 Fold3
## 3 0.8406395 0.5900261 Fold2
ad_tda_kde_5.50.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n3_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 11634 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 23268 matrix numeric
## oob.times 11634 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 11634 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.50.5_n3_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7115 558
## >50K 301 1794
##
## Accuracy : 0.9121
## 95% CI : (0.9063, 0.9176)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7502
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9594
## Specificity : 0.7628
## Pos Pred Value : 0.9273
## Neg Pred Value : 0.8563
## Prevalence : 0.7592
## Detection Rate : 0.7284
## Detection Prevalence : 0.7855
## Balanced Accuracy : 0.8611
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7115 558
## >50K 301 1794
##
## Accuracy : 0.9121
## 95% CI : (0.9063, 0.9176)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7502
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9594
## Specificity : 0.7628
## Pos Pred Value : 0.9273
## Neg Pred Value : 0.8563
## Prevalence : 0.7592
## Detection Rate : 0.7284
## Detection Prevalence : 0.7855
## Balanced Accuracy : 0.8611
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.120598e-01 7.501531e-01 9.062687e-01 9.176038e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 2.445041e-18
ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9594121 0.7627551 0.9272775
## Neg Pred Value Precision Recall
## 0.8563246 0.9272775 0.9594121
## F1 Prevalence Detection Rate
## 0.9430711 0.7592138 0.7283989
## Detection Prevalence Balanced Accuracy
## 0.7855242 0.8610836
ad_tda_kde_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n3_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n3_3_fold
## Accuracy
## 1 0.02280774
## 2 0.03081357
## 3 0.01813912
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n3_3_fold$probRight
bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0376
##
## $winRight
## [1] 0.9624
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_3_fold
## $left
## [1] 0.007751756
##
## $rope
## [1] 0.03360722
##
## $right
## [1] 0.958641
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
#bf_tda_kde_5.50.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold)
## t = 6.4635, df = 2, p-value = 0.02311
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.007996749 0.039843539
## sample estimates:
## mean of x
## 0.02392014
### Test set diff
diff_tda_kde_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n3_test
## Accuracy
## -0.0534398
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n3_test_odds.left<-bst_tda_kde_5.50.5_rf.n3_test$probLeft/bst_tda_kde_5.50.5_rf.n3_test$probRight
bst_tda_kde_5.50.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_test
## $winLeft
## [1] 0.8400333
##
## $winRope
## [1] 0.1599667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n3_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n3_test)) #bf_tda_kde_5.50.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.50.5_n4_RfFit0
## Random Forest
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6692, 6692, 6692
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8526599 0.5213723
## 100 0.8485754 0.5116948
## 150 0.8487747 0.5125076
## 200 0.8479777 0.5119375
## 250 0.8483762 0.5112452
## 300 0.8483762 0.5119849
## 350 0.8490735 0.5144138
## 400 0.8476788 0.5090453
## 450 0.8487747 0.5130535
## 500 0.8488743 0.5134342
## 550 0.8474796 0.5094925
## 600 0.8480773 0.5110583
## 650 0.8478781 0.5097130
## 700 0.8488743 0.5147859
## 750 0.8492728 0.5148479
## 800 0.8485754 0.5126250
## 850 0.8490735 0.5155813
## 900 0.8487747 0.5127515
## 950 0.8479777 0.5108229
## 1000 0.8469815 0.5086830
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8631201 0.5534715 Fold1
## 2 0.8520622 0.5137710 Fold3
## 3 0.8427974 0.4968743 Fold2
ad_tda_kde_5.50.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n4_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 10038 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 20076 matrix numeric
## oob.times 10038 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 10038 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.50.5_n4_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6975 654
## >50K 441 1698
##
## Accuracy : 0.8879
## 95% CI : (0.8815, 0.8941)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6836
##
## Mcnemar's Test P-Value : 1.488e-10
##
## Sensitivity : 0.9405
## Specificity : 0.7219
## Pos Pred Value : 0.9143
## Neg Pred Value : 0.7938
## Prevalence : 0.7592
## Detection Rate : 0.7141
## Detection Prevalence : 0.7810
## Balanced Accuracy : 0.8312
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6975 654
## >50K 441 1698
##
## Accuracy : 0.8879
## 95% CI : (0.8815, 0.8941)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6836
##
## Mcnemar's Test P-Value : 1.488e-10
##
## Sensitivity : 0.9405
## Specificity : 0.7219
## Pos Pred Value : 0.9143
## Neg Pred Value : 0.7938
## Prevalence : 0.7592
## Detection Rate : 0.7141
## Detection Prevalence : 0.7810
## Balanced Accuracy : 0.8312
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.878993e-01 6.836098e-01 8.814756e-01 8.940906e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.354808e-228 1.487834e-10
ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9405340 0.7219388 0.9142745
## Neg Pred Value Precision Recall
## 0.7938289 0.9142745 0.9405340
## F1 Prevalence Detection Rate
## 0.9272183 0.7592138 0.7140663
## Detection Prevalence Balanced Accuracy
## 0.7810197 0.8312364
ad_tda_kde_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n4_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n4_3_fold
## Accuracy
## 1 -0.006334570
## 2 0.006453235
## 3 0.015981256
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n4_3_fold$probRight
bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.8443
##
## $winRight
## [1] 0.1557
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_3_fold
## $left
## [1] 0.08788593
##
## $rope
## [1] 0.6130554
##
## $right
## [1] 0.2990587
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n4_test
## Accuracy
## -0.02927928
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
#bf_tda_kde_5.50.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold)
## t = 0.83012, df = 2, p-value = 0.4938
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02244955 0.03318283
## sample estimates:
## mean of x
## 0.00536664
bst_tda_kde_5.50.5_rf.n4_test_odds.left<-bst_tda_kde_5.50.5_rf.n4_test$probLeft/bst_tda_kde_5.50.5_rf.n4_test$probRight
bst_tda_kde_5.50.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_test
## $winLeft
## [1] 0.8431333
##
## $winRope
## [1] 0.1568667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n4_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n4_test)) #bf_tda_kde_5.50.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.France,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm,
## V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.50.5_n5_RfFit0
## Random Forest
##
## 7540 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5028, 5026, 5026
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8718853 0.4208062
## 100 0.8687033 0.4155007
## 150 0.8696315 0.4182978
## 200 0.8684376 0.4132666
## 250 0.8684380 0.4146251
## 300 0.8675096 0.4093738
## 350 0.8698968 0.4190122
## 400 0.8677754 0.4096011
## 450 0.8689686 0.4181539
## 500 0.8692338 0.4176560
## 550 0.8684385 0.4172817
## 600 0.8688363 0.4166412
## 650 0.8687038 0.4139031
## 700 0.8688358 0.4175411
## 750 0.8689683 0.4146387
## 800 0.8688359 0.4171044
## 850 0.8685704 0.4129635
## 900 0.8685711 0.4155595
## 950 0.8677746 0.4109668
## 1000 0.8687034 0.4145042
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.50.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8793790 0.4587947 Fold1
## 2 0.8663484 0.3978005 Fold3
## 3 0.8699284 0.4058232 Fold2
ad_tda_kde_5.50.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n5_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 7540 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 15080 matrix numeric
## oob.times 7540 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 7540 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.50.5_n5_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6656 743
## >50K 760 1609
##
## Accuracy : 0.8461
## 95% CI : (0.8388, 0.8532)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5802
##
## Mcnemar's Test P-Value : 0.6798
##
## Sensitivity : 0.8975
## Specificity : 0.6841
## Pos Pred Value : 0.8996
## Neg Pred Value : 0.6792
## Prevalence : 0.7592
## Detection Rate : 0.6814
## Detection Prevalence : 0.7575
## Balanced Accuracy : 0.7908
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6656 743
## >50K 760 1609
##
## Accuracy : 0.8461
## 95% CI : (0.8388, 0.8532)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5802
##
## Mcnemar's Test P-Value : 0.6798
##
## Sensitivity : 0.8975
## Specificity : 0.6841
## Pos Pred Value : 0.8996
## Neg Pred Value : 0.6792
## Prevalence : 0.7592
## Detection Rate : 0.6814
## Detection Prevalence : 0.7575
## Balanced Accuracy : 0.7908
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.461302e-01 5.801857e-01 8.388205e-01 8.532329e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.564944e-99 6.798222e-01
ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8975189 0.6840986 0.8995810
## Neg Pred Value Precision Recall
## 0.6791895 0.8995810 0.8975189
## F1 Prevalence Detection Rate
## 0.8985488 0.7592138 0.6814087
## Detection Prevalence Balanced Accuracy
## 0.7574734 0.7908088
ad_tda_kde_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n5_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n5_3_fold
## Accuracy
## 1 -0.02259341
## 2 -0.00783305
## 3 -0.01114978
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n5_3_fold$probRight
bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.6054
##
## $winRope
## [1] 0.3946
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_3_fold
## $left
## [1] 0.7336319
##
## $rope
## [1] 0.2444821
##
## $right
## [1] 0.02188601
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
#bf_tda_kde_5.50.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold)
## t = -3.0997, df = 2, p-value = 0.09022
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.033096130 0.005378641
## sample estimates:
## mean of x
## -0.01385874
### Test set diff
diff_tda_kde_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n5_test
## Accuracy
## 0.01248976
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_rf.n5_test_odds.left<-bst_tda_kde_5.50.5_rf.n5_test$probLeft/bst_tda_kde_5.50.5_rf.n5_test$probRight
bst_tda_kde_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4605333
##
## $winRight
## [1] 0.5394667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n5_test))
#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n5_test)) #bf_tda_kde_5.50.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n5_test))
##Non-TDA-Assisted
svmGrid<-expand.grid(sigma = c(0.1, 1, 10), C = (1:5*0.25))
#Support Vector Machine-Radial Basis
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8050718 0.346413571
## 0.1 0.50 0.8152943 0.408134642
## 0.1 0.75 0.8202519 0.435200975
## 0.1 1.00 0.8212610 0.447051342
## 0.1 1.25 0.8204713 0.450881072
## 1.0 0.25 0.7766419 0.136421752
## 1.0 0.50 0.7860747 0.211872997
## 1.0 0.75 0.7915588 0.256547069
## 1.0 1.00 0.7943228 0.287135922
## 1.0 1.25 0.7967359 0.311243802
## 10.0 0.25 0.7592682 0.000741839
## 10.0 0.50 0.7607160 0.015579351
## 10.0 0.75 0.7630413 0.042435399
## 10.0 1.00 0.7641820 0.068288068
## 10.0 1.25 0.7645768 0.087308496
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
adultSvmFit$resample
## Accuracy Kappa Resample
## 1 0.8210055 0.4506814 Fold1
## 2 0.8221666 0.4438993 Fold2
## 3 0.8206107 0.4465733 Fold3
ad_svm_fit_re<-adultSvmFit$resample[1]
summary(adultSvmFit)
## Length Class Mode
## 1 ksvm S4
#vip(adultSvmFit, 25) + ggtitle("non-TDA-Assited Svm")
# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6949 1215
## >50K 467 1137
##
## Accuracy : 0.8278
## 95% CI : (0.8202, 0.8352)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4717
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9370
## Specificity : 0.4834
## Pos Pred Value : 0.8512
## Neg Pred Value : 0.7089
## Prevalence : 0.7592
## Detection Rate : 0.7114
## Detection Prevalence : 0.8358
## Balanced Accuracy : 0.7102
##
## 'Positive' Class : <=50K
##
svm_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.278051e-01 4.716603e-01 8.201693e-01 8.352449e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.641899e-61 3.989568e-74
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9370280 0.4834184 0.8511759
## Neg Pred Value Precision Recall
## 0.7088529 0.8511759 0.9370280
## F1 Prevalence Detection Rate
## 0.8920411 0.7592138 0.7114046
## Detection Prevalence Balanced Accuracy
## 0.8357903 0.7102232
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8137, 8137, 8138
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.6762249 0.33461539
## 0.1 0.50 0.6908894 0.36962245
## 0.1 0.75 0.6957230 0.38197704
## 0.1 1.00 0.6970338 0.38582889
## 0.1 1.25 0.6956412 0.38396161
## 1.0 0.25 0.6146976 0.17941548
## 1.0 0.50 0.6441913 0.25304665
## 1.0 0.75 0.6551694 0.28371902
## 1.0 1.00 0.6572993 0.29270532
## 1.0 1.25 0.6601666 0.30140139
## 10.0 0.25 0.5572668 0.03234495
## 10.0 0.50 0.5749629 0.07892666
## 10.0 0.75 0.5908569 0.12199304
## 10.0 1.00 0.6009336 0.15083881
## 10.0 1.25 0.6054395 0.16481639
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.50.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7041042 0.3994792 Fold1
## 2 0.6950111 0.3822459 Fold2
## 3 0.6919862 0.3757616 Fold3
ad_tda_pc_5.50.5_n1_svm_fit_re<-Adult_TDA_PC_5.50.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.50.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n1_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1477 355
## >50K 5939 1997
##
## Accuracy : 0.3557
## 95% CI : (0.3462, 0.3652)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0266
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1992
## Specificity : 0.8491
## Pos Pred Value : 0.8062
## Neg Pred Value : 0.2516
## Prevalence : 0.7592
## Detection Rate : 0.1512
## Detection Prevalence : 0.1876
## Balanced Accuracy : 0.5241
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1477 355
## >50K 5939 1997
##
## Accuracy : 0.3557
## 95% CI : (0.3462, 0.3652)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0266
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1992
## Specificity : 0.8491
## Pos Pred Value : 0.8062
## Neg Pred Value : 0.2516
## Prevalence : 0.7592
## Detection Rate : 0.1512
## Detection Prevalence : 0.1876
## Balanced Accuracy : 0.5241
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.35565111 0.02663693 0.34615081 0.36523754 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.1991640 0.8490646 0.8062227
## Neg Pred Value Precision Recall
## 0.2516381 0.8062227 0.1991640
## F1 Prevalence Detection Rate
## 0.3194204 0.7592138 0.1512080
## Detection Prevalence Balanced Accuracy
## 0.1875512 0.5241143
ad_tda_pc_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n1_svm_fit_re)
diff_tda_pca_5.50.5_svm_n1_3_fold
## Accuracy
## 1 0.1169013
## 2 0.1271556
## 3 0.1286245
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n1_3_fold$probRight
bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0091
##
## $winRight
## [1] 0.9909
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_3_fold
## $left
## [1] 0.0005023475
##
## $rope
## [1] 0.0001909143
##
## $right
## [1] 0.9993067
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold)
## t = 33.69, df = 2, p-value = 0.0008799
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1083617 0.1400925
## sample estimates:
## mean of x
## 0.1242271
### Test set diff
diff_tda_pca_5.50.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n1_test
## Accuracy
## 0.472154
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n1_test_odds.left<-bst_tda_pca_5.50.5_svm.n1_test$probLeft/bst_tda_pca_5.50.5_svm.n1_test$probRight
bst_tda_pca_5.50.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1596
##
## $winRight
## [1] 0.8404
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n1_test)) #bf_tda_pca_5.50.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands, V14.Honduras, V14.Thailand, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8137, 8137, 8138
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.6778625 0.33756898
## 0.1 0.50 0.6918720 0.37103428
## 0.1 0.75 0.6952312 0.38015865
## 0.1 1.00 0.6962143 0.38371387
## 0.1 1.25 0.6976889 0.38760743
## 1.0 0.25 0.6143694 0.17845054
## 1.0 0.50 0.6498438 0.26512742
## 1.0 0.75 0.6572170 0.28795443
## 1.0 1.00 0.6627063 0.30388347
## 1.0 1.25 0.6648365 0.31051449
## 10.0 0.25 0.5559561 0.02963286
## 10.0 0.50 0.5729967 0.07417054
## 10.0 0.75 0.5874976 0.11367689
## 10.0 1.00 0.6002785 0.14891491
## 10.0 1.25 0.6036371 0.16034175
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.50.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7053330 0.4045356 Fold1
## 2 0.6885447 0.3686743 Fold3
## 3 0.6991890 0.3896124 Fold2
ad_tda_pc_5.50.5_n2_svm_fit_re<-Adult_TDA_PC_5.50.5_n2_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.50.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1496 347
## >50K 5920 2005
##
## Accuracy : 0.3584
## 95% CI : (0.3489, 0.368)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.03
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2017
## Specificity : 0.8525
## Pos Pred Value : 0.8117
## Neg Pred Value : 0.2530
## Prevalence : 0.7592
## Detection Rate : 0.1532
## Detection Prevalence : 0.1887
## Balanced Accuracy : 0.5271
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1496 347
## >50K 5920 2005
##
## Accuracy : 0.3584
## 95% CI : (0.3489, 0.368)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.03
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2017
## Specificity : 0.8525
## Pos Pred Value : 0.8117
## Neg Pred Value : 0.2530
## Prevalence : 0.7592
## Detection Rate : 0.1532
## Detection Prevalence : 0.1887
## Balanced Accuracy : 0.5271
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.35841523 0.02995696 0.34889776 0.36801720 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2017260 0.8524660 0.8117200
## Neg Pred Value Precision Recall
## 0.2529968 0.8117200 0.2017260
## F1 Prevalence Detection Rate
## 0.3231450 0.7592138 0.1531532
## Detection Prevalence Balanced Accuracy
## 0.1886773 0.5270960
ad_tda_pc_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n2_svm_fit_re)
diff_tda_pca_5.50.5_svm_n2_3_fold
## Accuracy
## 1 0.1156725
## 2 0.1336219
## 3 0.1214217
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n2_3_fold$probRight
bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009666667
##
## $winRight
## [1] 0.9903333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_3_fold
## $left
## [1] 0.00104314
##
## $rope
## [1] 0.0003980152
##
## $right
## [1] 0.9985588
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold)
## t = 23.351, df = 2, p-value = 0.001829
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1008028 0.1463413
## sample estimates:
## mean of x
## 0.123572
### Test set diff
diff_tda_pca_5.50.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n2_test
## Accuracy
## 0.4693898
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n2_test_odds.left<-bst_tda_pca_5.50.5_svm.n2_test$probLeft/bst_tda_pca_5.50.5_svm.n2_test$probRight
bst_tda_pca_5.50.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1589667
##
## $winRight
## [1] 0.8410333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n2_test)) #bf_tda_pca_5.50.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7842145 0.1162425046
## 0.1 0.50 0.7935045 0.1905707959
## 0.1 0.75 0.7977341 0.2296776075
## 0.1 1.00 0.7996222 0.2553808768
## 0.1 1.25 0.7993957 0.2673367247
## 1.0 0.25 0.7744714 0.0237078435
## 1.0 0.50 0.7771147 0.0587266562
## 1.0 0.75 0.7795317 0.0906697532
## 1.0 1.00 0.7814952 0.1231345858
## 1.0 1.25 0.7815709 0.1460611257
## 10.0 0.25 0.7713746 -0.0001509876
## 10.0 0.50 0.7716012 0.0038777846
## 10.0 0.75 0.7717523 0.0123107426
## 10.0 1.00 0.7712991 0.0258898059
## 10.0 1.25 0.7693353 0.0297386428
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.50.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7996828 0.2554193 Fold1
## 2 0.7978699 0.2563524 Fold2
## 3 0.8013140 0.2543709 Fold3
ad_tda_pc_5.50.5_n3_svm_fit_re<-Adult_TDA_PC_5.50.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.50.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n3_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6741 1787
## >50K 675 565
##
## Accuracy : 0.748
## 95% CI : (0.7392, 0.7565)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9954
##
## Kappa : 0.1779
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9090
## Specificity : 0.2402
## Pos Pred Value : 0.7905
## Neg Pred Value : 0.4556
## Prevalence : 0.7592
## Detection Rate : 0.6901
## Detection Prevalence : 0.8731
## Balanced Accuracy : 0.5746
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6741 1787
## >50K 675 565
##
## Accuracy : 0.748
## 95% CI : (0.7392, 0.7565)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9954
##
## Kappa : 0.1779
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9090
## Specificity : 0.2402
## Pos Pred Value : 0.7905
## Neg Pred Value : 0.4556
## Prevalence : 0.7592
## Detection Rate : 0.6901
## Detection Prevalence : 0.8731
## Balanced Accuracy : 0.5746
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.479525e-01 1.779221e-01 7.392178e-01 7.565392e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.953835e-01 4.835896e-111
ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9089806 0.2402211 0.7904550
## Neg Pred Value Precision Recall
## 0.4556452 0.7904550 0.9089806
## F1 Prevalence Detection Rate
## 0.8455845 0.7592138 0.6901106
## Detection Prevalence Balanced Accuracy
## 0.8730549 0.5746008
ad_tda_pc_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n3_svm_fit_re)
diff_tda_pca_5.50.5_svm_n3_3_fold
## Accuracy
## 1 0.02132277
## 2 0.02429671
## 3 0.01929669
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n3_3_fold$probRight
bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0367
##
## $winRight
## [1] 0.9633
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_3_fold
## $left
## [1] 0.001398239
##
## $rope
## [1] 0.00866563
##
## $right
## [1] 0.9899361
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold)
## t = 14.903, df = 2, p-value = 0.004473
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01539126 0.02788619
## sample estimates:
## mean of x
## 0.02163872
### Test set diff
diff_tda_pca_5.50.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n3_test
## Accuracy
## 0.07985258
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n3_test_odds.left<-bst_tda_pca_5.50.5_svm.n3_test$probLeft/bst_tda_pca_5.50.5_svm.n3_test$probRight
bst_tda_pca_5.50.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1608
##
## $winRight
## [1] 0.8392
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n3_test)) #bf_tda_pca_5.50.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n3_test))
##Node4
Adult_TDA_PC_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11134, 11133, 11133
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.9448503 0.009245734
## 0.1 0.50 0.9452695 0.046774504
## 0.1 0.75 0.9456287 0.080597130
## 0.1 1.00 0.9453293 0.100635769
## 0.1 1.25 0.9451497 0.118352999
## 1.0 0.25 0.9449701 0.011550314
## 1.0 0.50 0.9455089 0.045970741
## 1.0 0.75 0.9458084 0.072781270
## 1.0 1.00 0.9455688 0.080406156
## 1.0 1.25 0.9449101 0.089703945
## 10.0 0.25 0.9449102 0.000000000
## 10.0 0.50 0.9449701 0.005877392
## 10.0 0.75 0.9449701 0.013467304
## 10.0 1.00 0.9448503 0.016915957
## 10.0 1.25 0.9444311 0.021494283
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1 and C = 0.75.
Adult_TDA_PC_5.50.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9453827 0.06628466 Fold1
## 2 0.9457518 0.06762623 Fold3
## 3 0.9462906 0.08443292 Fold2
ad_tda_pc_5.50.5_n4_svm_fit_re<-Adult_TDA_PC_5.50.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.50.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n4_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7411 2318
## >50K 5 34
##
## Accuracy : 0.7622
## 95% CI : (0.7536, 0.7706)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.2504
##
## Kappa : 0.0207
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99933
## Specificity : 0.01446
## Pos Pred Value : 0.76174
## Neg Pred Value : 0.87179
## Prevalence : 0.75921
## Detection Rate : 0.75870
## Detection Prevalence : 0.99601
## Balanced Accuracy : 0.50689
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7411 2318
## >50K 5 34
##
## Accuracy : 0.7622
## 95% CI : (0.7536, 0.7706)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.2504
##
## Kappa : 0.0207
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99933
## Specificity : 0.01446
## Pos Pred Value : 0.76174
## Neg Pred Value : 0.87179
## Prevalence : 0.75921
## Detection Rate : 0.75870
## Detection Prevalence : 0.99601
## Balanced Accuracy : 0.50689
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76218264 0.02074795 0.75361100 0.77059767 0.75921376
## AccuracyPValue McnemarPValue
## 0.25037066 0.00000000
ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99932578 0.01445578 0.76174324
## Neg Pred Value Precision Recall
## 0.87179487 0.76174324 0.99932578
## F1 Prevalence Detection Rate
## 0.86450860 0.75921376 0.75870188
## Detection Prevalence Balanced Accuracy
## 0.99600737 0.50689078
ad_tda_pc_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n4_svm_fit_re)
diff_tda_pca_5.50.5_svm_n4_3_fold
## Accuracy
## 1 -0.1243772
## 2 -0.1235851
## 3 -0.1256800
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n4_3_fold$probRight
bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0.9907
##
## $winRope
## [1] 0.0093
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_3_fold
## $left
## [1] 0.9999811
##
## $rope
## [1] 5.214165e-06
##
## $right
## [1] 1.373363e-05
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold)
## t = -203.94, df = 2, p-value = 2.404e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1271750 -0.1219198
## sample estimates:
## mean of x
## -0.1245474
### Test set diff
diff_tda_pca_5.50.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n4_test
## Accuracy
## 0.06562244
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n4_test_odds.left<-bst_tda_pca_5.50.5_svm.n4_test$probLeft/bst_tda_pca_5.50.5_svm.n4_test$probRight
bst_tda_pca_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1563333
##
## $winRight
## [1] 0.8436667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n4_test)) #bf_tda_pca_5.50.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9603, 9602, 9603
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.9979867 0
## 0.1 0.50 0.9979867 0
## 0.1 0.75 0.9979867 0
## 0.1 1.00 0.9979867 0
## 0.1 1.25 0.9979867 0
## 1.0 0.25 0.9979867 0
## 1.0 0.50 0.9979867 0
## 1.0 0.75 0.9979867 0
## 1.0 1.00 0.9979867 0
## 1.0 1.25 0.9979867 0
## 10.0 0.25 0.9979867 0
## 10.0 0.50 0.9979867 0
## 10.0 0.75 0.9979867 0
## 10.0 1.00 0.9979867 0
## 10.0 1.25 0.9979867 0
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 10 and C = 0.25.
Adult_TDA_PC_5.50.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9981254 0 Fold1
## 2 0.9979175 0 Fold2
## 3 0.9979171 0 Fold3
ad_tda_pc_5.50.5_n5_svm_fit_re<-Adult_TDA_PC_5.50.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.50.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n5_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n5_svm_fit_re)
diff_tda_pca_5.50.5_svm_n5_3_fold
## Accuracy
## 1 -0.1771199
## 2 -0.1757509
## 3 -0.1773064
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n5_3_fold$probRight
bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0.9903667
##
## $winRope
## [1] 0.009633333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_3_fold
## $left
## [1] 0.9999942
##
## $rope
## [1] 1.169259e-06
##
## $right
## [1] 4.597945e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold)
## t = -360.38, df = 2, p-value = 7.7e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1788357 -0.1746158
## sample estimates:
## mean of x
## -0.1767257
### Test set diff
diff_tda_pca_5.50.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n5_test
## Accuracy
## 0.06859132
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_svm.n5_test_odds.left<-bst_tda_pca_5.50.5_svm.n5_test$probLeft/bst_tda_pca_5.50.5_svm.n5_test$probRight
bst_tda_pca_5.50.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1572
##
## $winRight
## [1] 0.8428
# Bayesian Correlated Test
bct_tda_pca_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n5_test)) #bf_tda_pca_5.50.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands,
## V14.Yugoslavia
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8925, 8924, 8925
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8009261 0.35791608
## 0.1 0.50 0.8075741 0.40507324
## 0.1 0.75 0.8117572 0.43067412
## 0.1 1.00 0.8131019 0.44455011
## 0.1 1.25 0.8131766 0.45046802
## 1.0 0.25 0.7614847 0.12988378
## 1.0 0.50 0.7723909 0.19892292
## 1.0 0.75 0.7803838 0.24996496
## 1.0 1.00 0.7834465 0.28031014
## 1.0 1.25 0.7850900 0.30099110
## 10.0 0.25 0.7407933 0.00000000
## 10.0 0.50 0.7423619 0.01112966
## 10.0 0.75 0.7457233 0.03639418
## 10.0 1.00 0.7494582 0.06782569
## 10.0 1.25 0.7516991 0.08859092
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.50.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8081578 0.4391419 Fold1
## 2 0.8133124 0.4462817 Fold3
## 3 0.8180596 0.4659804 Fold2
ad_tda_kde_5.50.5_n1_svm_fit_re<-Adult_TDA_KDE_5.50.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.50.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n1_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6974 907
## >50K 442 1445
##
## Accuracy : 0.8619
## 95% CI : (0.8549, 0.8687)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5949
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9404
## Specificity : 0.6144
## Pos Pred Value : 0.8849
## Neg Pred Value : 0.7658
## Prevalence : 0.7592
## Detection Rate : 0.7140
## Detection Prevalence : 0.8068
## Balanced Accuracy : 0.7774
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6974 907
## >50K 442 1445
##
## Accuracy : 0.8619
## 95% CI : (0.8549, 0.8687)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5949
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9404
## Specificity : 0.6144
## Pos Pred Value : 0.8849
## Neg Pred Value : 0.7658
## Prevalence : 0.7592
## Detection Rate : 0.7140
## Detection Prevalence : 0.8068
## Balanced Accuracy : 0.7774
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.618960e-01 5.949281e-01 8.548950e-01 8.686804e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.765119e-140 1.385980e-36
ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9403991 0.6143707 0.8849131
## Neg Pred Value Precision Recall
## 0.7657658 0.8849131 0.9403991
## F1 Prevalence Detection Rate
## 0.9118128 0.7592138 0.7139640
## Detection Prevalence Balanced Accuracy
## 0.8068182 0.7773849
ad_tda_kde_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n1_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n1_svm_fit_re)
diff_tda_kde_5.50.5_svm_n1_3_fold
## Accuracy
## 1 0.04862780
## 2 0.04520298
## 3 0.04071902
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n1_3_fold$probRight
bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008966667
##
## $winRight
## [1] 0.9910333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_3_fold
## $left
## [1] 0.001157901
##
## $rope
## [1] 0.001695744
##
## $right
## [1] 0.9971464
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
#bf_tda_kde_5.50.5_svm.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold)
## t = 19.586, df = 2, p-value = 0.002597
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.03499738 0.05470249
## sample estimates:
## mean of x
## 0.04484993
### Test set diff
diff_tda_kde_5.50.5_svm.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n1_test
## Accuracy
## -0.003276003
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n1_test_odds.left<-bst_tda_kde_5.50.5_svm.n1_test$probLeft/bst_tda_kde_5.50.5_svm.n1_test$probRight
bst_tda_kde_5.50.5_svm.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n1_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n1_test)) #bf_tda_kde_5.50.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12638 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8426, 8425, 8425
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7921346 0.38885952
## 0.1 0.50 0.8017089 0.43780314
## 0.1 0.75 0.8059028 0.46187097
## 0.1 1.00 0.8082766 0.47566168
## 0.1 1.25 0.8081184 0.47980336
## 1.0 0.25 0.7486153 0.15700715
## 1.0 0.50 0.7627788 0.23937027
## 1.0 0.75 0.7718780 0.29084582
## 1.0 1.00 0.7765463 0.32252259
## 1.0 1.25 0.7785243 0.34292475
## 10.0 0.25 0.7208419 0.00000000
## 10.0 0.50 0.7249565 0.02425912
## 10.0 0.75 0.7285171 0.05252581
## 10.0 1.00 0.7314447 0.08145614
## 10.0 1.25 0.7342932 0.10447355
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8081671 0.4791654 Fold1
## 2 0.8075006 0.4720081 Fold2
## 3 0.8091621 0.4758116 Fold3
ad_tda_kde_5.50.5_n2_svm_fit_re<-Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.50.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7038 1006
## >50K 378 1346
##
## Accuracy : 0.8583
## 95% CI : (0.8512, 0.8652)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5736
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9490
## Specificity : 0.5723
## Pos Pred Value : 0.8749
## Neg Pred Value : 0.7807
## Prevalence : 0.7592
## Detection Rate : 0.7205
## Detection Prevalence : 0.8235
## Balanced Accuracy : 0.7607
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7038 1006
## >50K 378 1346
##
## Accuracy : 0.8583
## 95% CI : (0.8512, 0.8652)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5736
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9490
## Specificity : 0.5723
## Pos Pred Value : 0.8749
## Neg Pred Value : 0.7807
## Prevalence : 0.7592
## Detection Rate : 0.7205
## Detection Prevalence : 0.8235
## Balanced Accuracy : 0.7607
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.583129e-01 5.735987e-01 8.512392e-01 8.651721e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.550417e-130 9.827767e-64
ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9490291 0.5722789 0.8749378
## Neg Pred Value Precision Recall
## 0.7807425 0.8749378 0.9490291
## F1 Prevalence Detection Rate
## 0.9104787 0.7592138 0.7205160
## Detection Prevalence Balanced Accuracy
## 0.8235053 0.7606540
ad_tda_kde_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n2_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n2_svm_fit_re)
diff_tda_kde_5.50.5_svm_n2_3_fold
## Accuracy
## 1 0.04861843
## 2 0.05101481
## 3 0.04961651
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n2_3_fold$probRight
bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008466667
##
## $winRight
## [1] 0.9915333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_3_fold
## $left
## [1] 9.017033e-05
##
## $rope
## [1] 0.0001134954
##
## $right
## [1] 0.9997963
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
#bf_tda_kde_5.50.5_svm.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold)
## t = 71.584, df = 2, p-value = 0.0001951
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.04675965 0.05274018
## sample estimates:
## mean of x
## 0.04974992
### Test set diff
diff_tda_kde_5.50.5_svm.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n2_test
## Accuracy
## 0.0003071253
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n2_test_odds.left<-bst_tda_kde_5.50.5_svm.n2_test$probLeft/bst_tda_kde_5.50.5_svm.n2_test$probRight
bst_tda_kde_5.50.5_svm.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n2_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n2_test)) #bf_tda_kde_5.50.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7755, 7756, 7757
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7885505 0.379228039
## 0.1 0.50 0.7985215 0.429985308
## 0.1 0.75 0.8013581 0.448513619
## 0.1 1.00 0.8013583 0.455557182
## 0.1 1.25 0.8022177 0.462238954
## 1.0 0.25 0.7519339 0.173527029
## 1.0 0.50 0.7653426 0.251618382
## 1.0 0.75 0.7716172 0.293702811
## 1.0 1.00 0.7735083 0.316325258
## 1.0 1.25 0.7746258 0.333441663
## 10.0 0.25 0.7243425 0.001801901
## 10.0 0.50 0.7270928 0.025341098
## 10.0 0.75 0.7306169 0.057745494
## 10.0 1.00 0.7367196 0.100794855
## 10.0 1.25 0.7385245 0.122084010
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.50.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7994328 0.4527386 Fold1
## 2 0.8006190 0.4583335 Fold3
## 3 0.8066013 0.4756447 Fold2
ad_tda_kde_5.50.5_n3_svm_fit_re<-Adult_TDA_KDE_5.50.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.50.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n3_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7037 1081
## >50K 379 1271
##
## Accuracy : 0.8505
## 95% CI : (0.8433, 0.8575)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5448
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9489
## Specificity : 0.5404
## Pos Pred Value : 0.8668
## Neg Pred Value : 0.7703
## Prevalence : 0.7592
## Detection Rate : 0.7204
## Detection Prevalence : 0.8311
## Balanced Accuracy : 0.7446
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7037 1081
## >50K 379 1271
##
## Accuracy : 0.8505
## 95% CI : (0.8433, 0.8575)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5448
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9489
## Specificity : 0.5404
## Pos Pred Value : 0.8668
## Neg Pred Value : 0.7703
## Prevalence : 0.7592
## Detection Rate : 0.7204
## Detection Prevalence : 0.8311
## Balanced Accuracy : 0.7446
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.505324e-01 5.448034e-01 8.433060e-01 8.575490e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 8.720880e-110 3.552727e-75
ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9488943 0.5403912 0.8668391
## Neg Pred Value Precision Recall
## 0.7703030 0.8668391 0.9488943
## F1 Prevalence Detection Rate
## 0.9060126 0.7592138 0.7204136
## Detection Prevalence Balanced Accuracy
## 0.8310811 0.7446427
ad_tda_kde_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n3_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n3_svm_fit_re)
diff_tda_kde_5.50.5_svm_n3_3_fold
## Accuracy
## 1 0.05735273
## 2 0.05789636
## 3 0.05217729
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n3_3_fold$probRight
bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009
##
## $winRight
## [1] 0.991
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_3_fold
## $left
## [1] 0.0005105301
##
## $rope
## [1] 0.0005413951
##
## $right
## [1] 0.9989481
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
#bf_tda_kde_5.50.5_svm.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold)
## t = 30.622, df = 2, p-value = 0.001065
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.04796711 0.06365048
## sample estimates:
## mean of x
## 0.05580879
### Test set diff
diff_tda_kde_5.50.5_svm.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n3_test
## Accuracy
## 0.008087633
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n3_test_odds.left<-bst_tda_kde_5.50.5_svm.n3_test$probLeft/bst_tda_kde_5.50.5_svm.n3_test$probRight
bst_tda_kde_5.50.5_svm.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n3_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n3_test)) #bf_tda_kde_5.50.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6691, 6692, 6693
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8237695 0.31328546
## 0.1 0.50 0.8328351 0.39162873
## 0.1 0.75 0.8355246 0.41869496
## 0.1 1.00 0.8376168 0.43522971
## 0.1 1.25 0.8357241 0.43694640
## 1.0 0.25 0.8032474 0.12347358
## 1.0 0.50 0.8109181 0.20267938
## 1.0 0.75 0.8136083 0.24582751
## 1.0 1.00 0.8165965 0.28184130
## 1.0 1.25 0.8160989 0.29695022
## 10.0 0.25 0.7908946 0.00000000
## 10.0 0.50 0.7915919 0.01127624
## 10.0 0.75 0.7932856 0.03911334
## 10.0 1.00 0.7939830 0.07201532
## 10.0 1.25 0.7974699 0.10670093
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.50.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8389603 0.4405910 Fold1
## 2 0.8374178 0.4315942 Fold2
## 3 0.8364723 0.4335039 Fold3
ad_tda_kde_5.50.5_n4_svm_fit_re<-Adult_TDA_KDE_5.50.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.50.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7140 1509
## >50K 276 843
##
## Accuracy : 0.8173
## 95% CI : (0.8095, 0.8249)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3912
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9628
## Specificity : 0.3584
## Pos Pred Value : 0.8255
## Neg Pred Value : 0.7534
## Prevalence : 0.7592
## Detection Rate : 0.7310
## Detection Prevalence : 0.8854
## Balanced Accuracy : 0.6606
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7140 1509
## >50K 276 843
##
## Accuracy : 0.8173
## 95% CI : (0.8095, 0.8249)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3912
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9628
## Specificity : 0.3584
## Pos Pred Value : 0.8255
## Neg Pred Value : 0.7534
## Prevalence : 0.7592
## Detection Rate : 0.7310
## Detection Prevalence : 0.8854
## Balanced Accuracy : 0.6606
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.172604e-01 3.912255e-01 8.094513e-01 8.248799e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.148711e-44 6.189516e-187
ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9627832 0.3584184 0.8255290
## Neg Pred Value Precision Recall
## 0.7533512 0.8255290 0.9627832
## F1 Prevalence Detection Rate
## 0.8888889 0.7592138 0.7309582
## Detection Prevalence Balanced Accuracy
## 0.8854423 0.6606008
ad_tda_kde_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n4_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n4_svm_fit_re)
diff_tda_kde_5.50.5_svm_n4_3_fold
## Accuracy
## 1 0.01782531
## 2 0.02109759
## 3 0.02230628
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n4_3_fold$probRight
bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0383
##
## $winRight
## [1] 0.9617
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_3_fold
## $left
## [1] 0.001286568
##
## $rope
## [1] 0.009383813
##
## $right
## [1] 0.9893296
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
#bf_tda_kde_5.50.5_svm.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold)
## t = 15.248, df = 2, p-value = 0.004273
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01465068 0.02616877
## sample estimates:
## mean of x
## 0.02040973
### Test set diff
diff_tda_kde_5.50.5_svm.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n4_test
## Accuracy
## 0.04135954
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n4_test_odds.left<-bst_tda_kde_5.50.5_svm.n4_test$probLeft/bst_tda_kde_5.50.5_svm.n4_test$probRight
bst_tda_kde_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1565
##
## $winRight
## [1] 0.8435
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n4_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n4_test)) #bf_tda_kde_5.50.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7756, 7756, 7756
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7869177 0.372106305
## 0.1 0.50 0.7983497 0.430093175
## 0.1 0.75 0.8017019 0.449378082
## 0.1 1.00 0.8019598 0.457247054
## 0.1 1.25 0.8019598 0.461313260
## 1.0 0.25 0.7520199 0.171972569
## 1.0 0.50 0.7623345 0.242273554
## 1.0 0.75 0.7682654 0.284635308
## 1.0 1.00 0.7719615 0.313133216
## 1.0 1.25 0.7725632 0.327961061
## 10.0 0.25 0.7243424 0.002359636
## 10.0 0.50 0.7263194 0.021100751
## 10.0 0.75 0.7298436 0.050548989
## 10.0 1.00 0.7344851 0.087034761
## 10.0 1.25 0.7362902 0.107966831
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.50.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7960289 0.4447181 Fold1
## 2 0.8073749 0.4811562 Fold3
## 3 0.8024755 0.4580655 Fold2
ad_tda_kde_5.50.5_n5_svm_fit_re<-Adult_TDA_KDE_5.50.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.50.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.50.5_n5_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7037 1081
## >50K 379 1271
##
## Accuracy : 0.8505
## 95% CI : (0.8433, 0.8575)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5448
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9489
## Specificity : 0.5404
## Pos Pred Value : 0.8668
## Neg Pred Value : 0.7703
## Prevalence : 0.7592
## Detection Rate : 0.7204
## Detection Prevalence : 0.8311
## Balanced Accuracy : 0.7446
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7037 1081
## >50K 379 1271
##
## Accuracy : 0.8505
## 95% CI : (0.8433, 0.8575)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5448
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9489
## Specificity : 0.5404
## Pos Pred Value : 0.8668
## Neg Pred Value : 0.7703
## Prevalence : 0.7592
## Detection Rate : 0.7204
## Detection Prevalence : 0.8311
## Balanced Accuracy : 0.7446
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.505324e-01 5.448034e-01 8.433060e-01 8.575490e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 8.720880e-110 3.552727e-75
ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9488943 0.5403912 0.8668391
## Neg Pred Value Precision Recall
## 0.7703030 0.8668391 0.9488943
## F1 Prevalence Detection Rate
## 0.9060126 0.7592138 0.7204136
## Detection Prevalence Balanced Accuracy
## 0.8310811 0.7446427
ad_tda_kde_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_svm_n5_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n5_svm_fit_re)
diff_tda_kde_5.50.5_svm_n5_3_fold
## Accuracy
## 1 0.06075669
## 2 0.05114046
## 3 0.05630312
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n5_3_fold$probRight
bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0088
##
## $winRight
## [1] 0.9912
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_3_fold
## $left
## [1] 0.001174964
##
## $rope
## [1] 0.001232736
##
## $right
## [1] 0.9975923
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
#bf_tda_kde_5.50.5_svm.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold)
## t = 20.179, df = 2, p-value = 0.002447
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.04411192 0.06802160
## sample estimates:
## mean of x
## 0.05606676
### Test set diff
diff_tda_kde_5.50.5_svm.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n5_test
## Accuracy
## 0.008087633
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_svm.n5_test_odds.left<-bst_tda_kde_5.50.5_svm.n5_test$probLeft/bst_tda_kde_5.50.5_svm.n5_test$probRight
bst_tda_kde_5.50.5_svm.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n5_test))
#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n5_test)) #bf_tda_kde_5.50.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))
#Non-TDA-Assisted
nn1Grid<-expand.grid(size = c(2,3,5,7), decay = c(0.3,0.5,0.7))
#Neural Network
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 14548.581548
## final value 8389.316919
## converged
## # weights: 331
## initial value 13261.985084
## final value 8389.251955
## converged
## # weights: 551
## initial value 9505.289847
## iter 10 value 8127.909645
## iter 20 value 7705.626465
## iter 30 value 7580.565673
## iter 40 value 7390.912265
## iter 50 value 7292.717057
## iter 60 value 7241.047880
## iter 70 value 7032.607063
## iter 80 value 6474.816749
## iter 90 value 5929.154853
## iter 100 value 5774.324333
## final value 5774.324333
## stopped after 100 iterations
## # weights: 771
## initial value 8918.219637
## iter 10 value 8064.209312
## iter 20 value 7730.372203
## iter 30 value 7651.155247
## iter 40 value 7588.913980
## iter 50 value 7504.112055
## iter 60 value 7357.560443
## iter 70 value 7194.703772
## iter 80 value 6348.261745
## iter 90 value 6135.759374
## iter 100 value 5494.808818
## final value 5494.808818
## stopped after 100 iterations
## # weights: 221
## initial value 21809.731847
## iter 10 value 8410.836456
## iter 20 value 8410.138588
## iter 30 value 8297.927215
## iter 40 value 8292.596798
## iter 50 value 8292.583353
## iter 60 value 8184.729647
## iter 70 value 8134.729629
## iter 80 value 7742.215239
## iter 90 value 7714.045999
## iter 100 value 7707.307397
## final value 7707.307397
## stopped after 100 iterations
## # weights: 331
## initial value 8465.704937
## iter 10 value 8145.013208
## iter 20 value 7747.280823
## iter 30 value 7721.975702
## iter 40 value 7702.825642
## iter 50 value 7573.575868
## iter 60 value 7122.002939
## iter 70 value 6376.175773
## iter 80 value 6271.468461
## iter 90 value 6201.319576
## iter 100 value 5619.269573
## final value 5619.269573
## stopped after 100 iterations
## # weights: 551
## initial value 8565.061426
## iter 10 value 8252.002177
## iter 20 value 8246.076400
## iter 30 value 8244.190142
## iter 40 value 7687.243118
## iter 50 value 7611.299048
## iter 60 value 7469.278849
## iter 70 value 7369.824884
## iter 80 value 7351.879627
## iter 90 value 7345.879460
## iter 100 value 7342.244445
## final value 7342.244445
## stopped after 100 iterations
## # weights: 771
## initial value 12988.749828
## iter 10 value 8377.773032
## iter 20 value 7785.821091
## iter 30 value 7745.149913
## iter 40 value 7680.792232
## iter 50 value 7652.089827
## iter 60 value 7601.691086
## iter 70 value 7572.728827
## iter 80 value 7326.203781
## iter 90 value 6991.445116
## iter 100 value 6352.394906
## final value 6352.394906
## stopped after 100 iterations
## # weights: 221
## initial value 15591.118152
## iter 10 value 8314.210316
## iter 20 value 7753.179620
## iter 30 value 7728.667963
## iter 40 value 7583.602375
## iter 50 value 7483.323821
## iter 60 value 7414.527613
## iter 70 value 7345.504290
## iter 80 value 7305.037147
## iter 90 value 6902.310845
## iter 100 value 6096.738387
## final value 6096.738387
## stopped after 100 iterations
## # weights: 331
## initial value 13697.848791
## iter 10 value 8266.585251
## iter 20 value 7660.552751
## iter 30 value 7574.006118
## iter 40 value 7558.482703
## iter 50 value 7515.035583
## iter 60 value 7456.177312
## iter 70 value 7376.751322
## iter 80 value 6924.531216
## iter 90 value 6334.739091
## iter 100 value 5869.895931
## final value 5869.895931
## stopped after 100 iterations
## # weights: 551
## initial value 12280.312637
## iter 10 value 8270.262344
## iter 20 value 7808.520059
## iter 30 value 7581.215030
## iter 40 value 7541.832555
## iter 50 value 7512.732347
## iter 60 value 7446.493040
## iter 70 value 7410.335408
## iter 80 value 7346.736220
## iter 90 value 7309.492872
## iter 100 value 7206.326378
## final value 7206.326378
## stopped after 100 iterations
## # weights: 771
## initial value 9489.523994
## iter 10 value 7807.145692
## iter 20 value 7740.100381
## iter 30 value 7711.446920
## iter 40 value 7664.282349
## iter 50 value 7528.705413
## iter 60 value 7315.779895
## iter 70 value 7264.946723
## iter 80 value 7173.838741
## iter 90 value 6565.621735
## iter 100 value 5600.847686
## final value 5600.847686
## stopped after 100 iterations
## # weights: 221
## initial value 8499.650765
## iter 10 value 8238.602024
## iter 20 value 7771.578514
## iter 30 value 7632.471860
## iter 40 value 7605.350195
## iter 50 value 7597.598519
## iter 60 value 7563.136291
## iter 70 value 7361.903102
## iter 80 value 6790.728894
## iter 90 value 5909.667577
## iter 100 value 5474.174827
## final value 5474.174827
## stopped after 100 iterations
## # weights: 331
## initial value 12942.141421
## iter 10 value 8267.759111
## iter 20 value 8262.101253
## iter 30 value 8233.712392
## iter 40 value 8065.102197
## iter 50 value 7698.093867
## iter 60 value 7233.088056
## iter 70 value 6948.272049
## iter 80 value 6910.761451
## iter 90 value 6898.592899
## iter 100 value 6888.757918
## final value 6888.757918
## stopped after 100 iterations
## # weights: 551
## initial value 11219.387799
## iter 10 value 8240.885600
## iter 20 value 7871.635899
## iter 30 value 7774.781347
## iter 40 value 7774.390739
## iter 50 value 7681.143343
## iter 60 value 7624.252860
## iter 70 value 7615.506646
## iter 80 value 7604.485205
## iter 90 value 7594.308480
## iter 100 value 7581.984729
## final value 7581.984729
## stopped after 100 iterations
## # weights: 771
## initial value 10739.870293
## iter 10 value 7856.447196
## iter 20 value 7789.279797
## iter 30 value 7788.973952
## iter 40 value 7758.448143
## iter 50 value 7620.191729
## iter 60 value 7522.801656
## iter 70 value 7505.296018
## iter 80 value 7446.117130
## iter 90 value 7330.333991
## iter 100 value 7307.907366
## final value 7307.907366
## stopped after 100 iterations
## # weights: 221
## initial value 15489.573310
## final value 8388.355832
## converged
## # weights: 331
## initial value 8635.513297
## iter 10 value 8385.182909
## iter 20 value 8383.515651
## iter 30 value 7972.208962
## iter 40 value 7875.696107
## iter 50 value 7804.841942
## iter 60 value 7694.932311
## iter 70 value 7624.687267
## iter 80 value 7563.788469
## iter 90 value 7512.367949
## iter 100 value 7456.369843
## final value 7456.369843
## stopped after 100 iterations
## # weights: 551
## initial value 8686.432030
## iter 10 value 7963.183767
## iter 20 value 7783.155726
## iter 30 value 7765.480133
## iter 40 value 7671.234084
## iter 50 value 7604.563055
## iter 60 value 7550.753673
## iter 70 value 7473.729951
## iter 80 value 7418.171544
## iter 90 value 7079.497314
## iter 100 value 6930.882204
## final value 6930.882204
## stopped after 100 iterations
## # weights: 771
## initial value 12694.722565
## iter 10 value 8075.354307
## iter 20 value 7789.820625
## iter 30 value 7783.656527
## iter 40 value 7673.272790
## iter 50 value 7603.499703
## iter 60 value 7595.150039
## iter 70 value 7587.037509
## iter 80 value 7572.539040
## iter 90 value 7564.372453
## iter 100 value 7465.044407
## final value 7465.044407
## stopped after 100 iterations
## # weights: 221
## initial value 13670.393422
## iter 10 value 8331.515641
## iter 20 value 7744.884137
## iter 30 value 7643.809835
## iter 40 value 7632.506579
## iter 50 value 7454.680418
## iter 60 value 7364.113780
## iter 70 value 7132.030724
## iter 80 value 7021.731748
## iter 90 value 6837.802552
## iter 100 value 6436.400178
## final value 6436.400178
## stopped after 100 iterations
## # weights: 331
## initial value 12382.193508
## iter 10 value 8272.299664
## iter 20 value 7827.163613
## iter 30 value 7826.048907
## iter 40 value 7825.993585
## iter 50 value 7805.171497
## iter 60 value 7658.660347
## iter 70 value 7637.175610
## iter 80 value 7631.361666
## iter 90 value 7575.723483
## iter 100 value 7478.905527
## final value 7478.905527
## stopped after 100 iterations
## # weights: 551
## initial value 16791.999397
## iter 10 value 8414.418160
## iter 20 value 8248.702076
## iter 30 value 7787.173721
## iter 40 value 7654.289390
## iter 50 value 7628.116157
## iter 60 value 7527.088828
## iter 70 value 7232.888928
## iter 80 value 7099.634066
## iter 90 value 6689.108443
## iter 100 value 5859.003719
## final value 5859.003719
## stopped after 100 iterations
## # weights: 771
## initial value 13994.478018
## iter 10 value 8328.087863
## iter 20 value 7822.270077
## iter 30 value 7750.163027
## iter 40 value 7687.449672
## iter 50 value 7627.373426
## iter 60 value 7286.942350
## iter 70 value 7031.148738
## iter 80 value 6628.553989
## iter 90 value 5802.161723
## iter 100 value 5542.857066
## final value 5542.857066
## stopped after 100 iterations
## # weights: 221
## initial value 14672.488444
## iter 10 value 8164.125165
## iter 20 value 7688.946277
## iter 30 value 7616.520442
## iter 40 value 7342.798570
## iter 50 value 6450.833966
## iter 60 value 6245.025062
## iter 70 value 5787.285906
## iter 80 value 5517.461403
## iter 90 value 5208.023914
## iter 100 value 5026.306172
## final value 5026.306172
## stopped after 100 iterations
## # weights: 331
## initial value 12024.613929
## iter 10 value 8338.173788
## iter 20 value 8199.674095
## iter 30 value 8106.100266
## iter 40 value 7160.752356
## iter 50 value 6392.534716
## iter 60 value 6007.107484
## iter 70 value 5366.728460
## iter 80 value 5217.464825
## iter 90 value 5025.400916
## iter 100 value 4919.841071
## final value 4919.841071
## stopped after 100 iterations
## # weights: 551
## initial value 11085.459292
## iter 10 value 8367.183302
## iter 20 value 7774.887022
## iter 30 value 7739.263226
## iter 40 value 7697.478804
## iter 50 value 7638.123535
## iter 60 value 7591.789950
## iter 70 value 7422.788270
## iter 80 value 7367.885736
## iter 90 value 7305.789110
## iter 100 value 6651.088618
## final value 6651.088618
## stopped after 100 iterations
## # weights: 771
## initial value 19249.130209
## iter 10 value 7980.045537
## iter 20 value 7670.528687
## iter 30 value 7638.017013
## iter 40 value 7624.844120
## iter 50 value 7582.687354
## iter 60 value 7573.851782
## iter 70 value 7544.734345
## iter 80 value 7386.111523
## iter 90 value 7291.907496
## iter 100 value 7121.478671
## final value 7121.478671
## stopped after 100 iterations
## # weights: 221
## initial value 8639.460141
## iter 10 value 8301.241185
## iter 10 value 8301.241173
## iter 20 value 7962.585510
## iter 30 value 7776.974757
## iter 40 value 7770.244305
## iter 50 value 7742.934863
## iter 60 value 7733.338705
## iter 70 value 7602.904125
## iter 80 value 7516.883837
## iter 90 value 7446.720399
## iter 100 value 7411.933641
## final value 7411.933641
## stopped after 100 iterations
## # weights: 331
## initial value 9536.561339
## iter 10 value 7970.869683
## iter 20 value 7772.755878
## iter 30 value 7694.966374
## iter 40 value 7594.404951
## iter 50 value 7577.875038
## iter 60 value 7480.075042
## iter 70 value 7443.390754
## iter 80 value 7394.417988
## iter 90 value 6782.059724
## iter 100 value 6374.681601
## final value 6374.681601
## stopped after 100 iterations
## # weights: 551
## initial value 15873.186513
## iter 10 value 8387.363549
## iter 20 value 8387.104816
## iter 30 value 8319.206589
## iter 40 value 7810.321295
## iter 50 value 7626.368750
## iter 60 value 7565.435314
## iter 70 value 7558.174288
## iter 80 value 7553.399614
## iter 90 value 7546.758136
## iter 100 value 7543.088293
## final value 7543.088293
## stopped after 100 iterations
## # weights: 771
## initial value 8917.377732
## iter 10 value 7787.962320
## iter 20 value 7760.112612
## iter 30 value 7750.537740
## iter 40 value 7712.663459
## iter 50 value 7696.249040
## iter 60 value 7687.770799
## iter 70 value 7613.744379
## iter 80 value 7554.089556
## iter 90 value 7543.477060
## iter 100 value 7517.258437
## final value 7517.258437
## stopped after 100 iterations
## # weights: 221
## initial value 9647.221252
## iter 10 value 8386.589990
## iter 20 value 8058.198077
## iter 30 value 7751.011465
## iter 40 value 7749.377947
## final value 7749.375313
## converged
## # weights: 331
## initial value 18626.322738
## iter 10 value 8321.418177
## iter 20 value 7799.316733
## iter 30 value 7798.363312
## iter 40 value 7791.229522
## iter 50 value 7775.263505
## iter 60 value 7745.860450
## iter 70 value 7644.170840
## iter 80 value 7210.975003
## iter 90 value 6862.677711
## iter 100 value 6763.356817
## final value 6763.356817
## stopped after 100 iterations
## # weights: 551
## initial value 8514.288357
## iter 10 value 7869.534517
## iter 20 value 7683.902058
## iter 30 value 7615.059347
## iter 40 value 6952.890684
## iter 50 value 6816.948749
## iter 60 value 6225.955986
## iter 70 value 5864.588035
## iter 80 value 5681.086302
## iter 90 value 5326.577845
## iter 100 value 5114.818776
## final value 5114.818776
## stopped after 100 iterations
## # weights: 771
## initial value 14830.129964
## iter 10 value 8265.106488
## iter 20 value 7778.982903
## iter 30 value 7634.887154
## iter 40 value 7518.686713
## iter 50 value 7480.384417
## iter 60 value 7462.568754
## iter 70 value 7451.279956
## iter 80 value 7422.492853
## iter 90 value 7407.370561
## iter 100 value 7343.403036
## final value 7343.403036
## stopped after 100 iterations
## # weights: 771
## initial value 27434.686025
## iter 10 value 12570.229169
## iter 20 value 12126.117754
## iter 30 value 11550.474052
## iter 40 value 11459.097934
## iter 50 value 11449.742511
## iter 60 value 11435.954634
## iter 70 value 11393.210999
## iter 80 value 11239.558484
## iter 90 value 11127.231605
## iter 100 value 10976.538999
## final value 10976.538999
## stopped after 100 iterations
adultNn1Fit
## Neural Network
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8117381 0.3778008
## 2 0.5 0.7830478 0.1539123
## 2 0.7 0.7978325 0.2555682
## 3 0.3 0.8019549 0.3189614
## 3 0.5 0.8155145 0.4303865
## 3 0.7 0.8037126 0.3301381
## 5 0.3 0.8107314 0.4435443
## 5 0.5 0.7986223 0.2965459
## 5 0.7 0.8165657 0.4136638
## 7 0.3 0.8153830 0.3923103
## 7 0.5 0.7965602 0.2435871
## 7 0.7 0.8272724 0.4685915
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.7.
adultNn1Fit$resample
## Accuracy Kappa Resample
## 1 0.8286391 0.5271645 Fold2
## 2 0.8462551 0.5525540 Fold1
## 3 0.8069229 0.3260560 Fold3
ad_nn1_fit_re<-adultNn1Fit$resample[1]
summary(adultNn1Fit)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -0.97 -0.29 -0.63 0.49 -0.05 0.01 -0.32 -0.29
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.46 0.31 -0.04 0.00 -0.11 -0.39 -0.17 0.06
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.18 -0.08 -0.07 -0.27 0.07 0.43 0.57 0.05
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.90 -0.04 0.14 -2.24 0.11 -0.94 -0.06 1.21
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.03 0.09 0.28 -1.58 -0.62 0.18 0.01 0.84
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -0.05 -0.08 0.11 0.05 -1.43 -0.14 0.63 -0.36
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.35 -0.08 0.31 1.19 0.00 0.01 -1.82 -0.31
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## -0.03 -0.10 0.00 0.15 0.13 -1.16 -1.37 0.40
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.01 0.01 0.42 0.03 -0.03 -0.10 -0.07 0.04
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## -0.13 -0.01 0.00 0.04 0.02 0.01 0.02 -0.03
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.10 0.05 0.00 0.01 0.01 -0.04 0.08 -0.03
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.03 0.10 0.01 0.03 0.04 0.72 0.01 0.02
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.01 -0.14 0.11 0.03 0.07 0.02 0.06 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 -0.01 -1.98 -0.05 -0.03
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 -0.10 0.00 0.00 0.00 0.00 0.00 -0.01
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 -0.06 0.00 0.00 -0.01
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## -0.01 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 -0.01 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## -0.29 0.22 -0.17 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 1.09 0.23 0.15 0.48 -0.11 0.00 -0.05 -0.49
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.32 0.68 0.11 0.00 0.46 -0.03 -0.21 -0.07
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.24 0.01 -0.29 -0.05 1.29 -1.21 -0.07 0.92
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## -0.87 0.00 -0.33 1.30 0.92 1.11 0.00 -1.10
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.46 -0.08 0.69 0.15 1.69 0.00 -0.60
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## -0.97 0.31 0.76 -0.53 0.49 -0.07 0.36 0.02
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## -0.77 0.24 0.01 -2.02 1.51 -0.01 -0.34 1.01
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.92 0.01 -0.34 0.27 0.01 1.13 2.77 -1.68
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## -0.01 0.00 0.51 0.16 0.00 0.10 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 -0.07 0.02 0.00 0.00 0.13 -0.01
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 -0.07 0.00 -0.37 0.00 0.00 0.01 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.01 0.00 0.00 -0.23 -0.02 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 1.43 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## 0.67 0.88 0.67 0.00 0.66 2.52 -4.53 0.66
vip(adultNn1Fit,25) + ggtitle("non-TDA-Assited NN")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7388 1888
## >50K 28 464
##
## Accuracy : 0.8038
## 95% CI : (0.7958, 0.8117)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2651
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9962
## Specificity : 0.1973
## Pos Pred Value : 0.7965
## Neg Pred Value : 0.9431
## Prevalence : 0.7592
## Detection Rate : 0.7563
## Detection Prevalence : 0.9496
## Balanced Accuracy : 0.5968
##
## 'Positive' Class : <=50K
##
nn1_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.038493e-01 2.650743e-01 7.958334e-01 8.116836e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.707074e-26 0.000000e+00
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9962244 0.1972789 0.7964640
## Neg Pred Value Precision Recall
## 0.9430894 0.7964640 0.9962244
## F1 Prevalence Detection Rate
## 0.8852145 0.7592138 0.7563473
## Detection Prevalence Balanced Accuracy
## 0.9496314 0.5967516
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_PC_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 2836.907945
## iter 10 value 518.577253
## iter 20 value 403.625430
## iter 30 value 403.044180
## iter 40 value 402.860008
## final value 402.859989
## converged
## # weights: 331
## initial value 2539.186774
## iter 10 value 405.303581
## iter 20 value 403.301552
## iter 30 value 402.542923
## iter 40 value 402.541618
## final value 402.541507
## converged
## # weights: 551
## initial value 4712.689662
## iter 10 value 403.473836
## iter 20 value 397.173548
## iter 30 value 395.472907
## iter 40 value 395.003181
## iter 50 value 394.950209
## iter 60 value 394.217672
## iter 70 value 393.924533
## iter 80 value 393.648611
## iter 90 value 393.520366
## final value 393.519927
## converged
## # weights: 771
## initial value 2931.777566
## iter 10 value 402.671812
## iter 20 value 402.066953
## iter 30 value 395.005035
## iter 40 value 394.994288
## iter 50 value 394.969819
## iter 60 value 394.777523
## iter 70 value 394.705355
## iter 80 value 394.701043
## iter 80 value 394.701040
## iter 80 value 394.701036
## final value 394.701036
## converged
## # weights: 221
## initial value 1455.279669
## iter 10 value 433.989726
## iter 20 value 404.791959
## iter 30 value 404.790217
## final value 404.790209
## converged
## # weights: 331
## initial value 3201.278100
## iter 10 value 407.979300
## iter 20 value 406.238653
## iter 30 value 403.007701
## iter 40 value 399.481792
## iter 50 value 398.147614
## iter 60 value 397.984646
## iter 70 value 397.981722
## iter 80 value 397.972888
## iter 90 value 397.969887
## iter 90 value 397.969885
## iter 90 value 397.969883
## final value 397.969883
## converged
## # weights: 551
## initial value 1055.738991
## iter 10 value 424.133975
## iter 20 value 403.285895
## iter 30 value 396.873742
## iter 40 value 395.261660
## iter 50 value 395.236942
## final value 395.229171
## converged
## # weights: 771
## initial value 3248.226213
## iter 10 value 466.258805
## iter 20 value 405.961583
## iter 30 value 401.652375
## iter 40 value 387.471252
## iter 50 value 370.396571
## iter 60 value 364.811836
## iter 70 value 322.882505
## iter 80 value 270.645948
## iter 90 value 266.268129
## iter 100 value 263.695663
## final value 263.695663
## stopped after 100 iterations
## # weights: 221
## initial value 2966.230897
## iter 10 value 408.565988
## iter 20 value 403.603799
## iter 30 value 398.868543
## iter 40 value 398.772568
## iter 50 value 398.730984
## iter 60 value 398.550217
## final value 398.549188
## converged
## # weights: 331
## initial value 1613.762595
## iter 10 value 408.606624
## iter 20 value 400.095618
## iter 30 value 398.647911
## iter 40 value 398.454091
## iter 50 value 397.262723
## iter 60 value 397.169852
## final value 397.167410
## converged
## # weights: 551
## initial value 2174.968963
## iter 10 value 633.720298
## iter 20 value 404.612355
## iter 30 value 402.623205
## iter 40 value 397.744602
## iter 50 value 397.705135
## iter 60 value 397.675471
## iter 70 value 396.501009
## iter 80 value 396.084661
## iter 90 value 396.082717
## final value 396.082677
## converged
## # weights: 771
## initial value 566.958583
## iter 10 value 407.754307
## iter 20 value 403.177084
## iter 30 value 403.048192
## iter 40 value 403.039149
## iter 50 value 402.995478
## iter 60 value 402.709998
## iter 70 value 397.747754
## iter 80 value 395.613532
## final value 395.612123
## converged
## # weights: 221
## initial value 3356.511421
## iter 10 value 404.689115
## iter 20 value 404.647264
## iter 30 value 394.040882
## iter 40 value 393.920409
## iter 50 value 393.227123
## iter 60 value 374.176660
## iter 70 value 339.811266
## iter 80 value 321.040164
## iter 90 value 274.329740
## iter 100 value 255.499087
## final value 255.499087
## stopped after 100 iterations
## # weights: 331
## initial value 1706.736790
## iter 10 value 406.872935
## iter 20 value 398.535879
## iter 30 value 393.592673
## iter 40 value 390.510024
## iter 50 value 387.405058
## iter 60 value 386.477134
## iter 70 value 386.469270
## iter 80 value 385.874675
## iter 90 value 382.478654
## iter 100 value 377.713869
## final value 377.713869
## stopped after 100 iterations
## # weights: 551
## initial value 1134.810330
## iter 10 value 420.210900
## iter 20 value 402.548055
## iter 30 value 402.537683
## iter 40 value 400.719326
## iter 50 value 399.990642
## iter 60 value 394.688728
## iter 70 value 390.073144
## iter 80 value 381.195611
## iter 90 value 375.793504
## iter 100 value 363.620097
## final value 363.620097
## stopped after 100 iterations
## # weights: 771
## initial value 3766.253548
## iter 10 value 402.780399
## iter 20 value 402.131264
## iter 30 value 399.966328
## iter 40 value 395.100500
## iter 50 value 390.733489
## iter 60 value 385.698727
## iter 70 value 385.366525
## iter 80 value 382.204517
## iter 90 value 379.865764
## iter 100 value 378.623991
## final value 378.623991
## stopped after 100 iterations
## # weights: 221
## initial value 2423.032008
## iter 10 value 412.544656
## iter 20 value 410.688403
## iter 30 value 388.431592
## iter 40 value 311.130345
## iter 50 value 280.771948
## iter 60 value 277.822360
## iter 70 value 272.977846
## iter 80 value 260.203148
## iter 90 value 257.582246
## iter 100 value 255.507047
## final value 255.507047
## stopped after 100 iterations
## # weights: 331
## initial value 4013.634672
## iter 10 value 407.979398
## iter 20 value 404.917367
## iter 30 value 401.736345
## iter 40 value 396.399543
## iter 50 value 394.002718
## iter 60 value 379.758636
## iter 70 value 283.702370
## iter 80 value 267.984108
## iter 90 value 260.938781
## iter 100 value 260.735785
## final value 260.735785
## stopped after 100 iterations
## # weights: 551
## initial value 3930.893896
## iter 10 value 591.908271
## iter 20 value 553.824710
## iter 30 value 513.427847
## iter 40 value 340.423225
## iter 50 value 290.630860
## iter 60 value 272.720562
## iter 70 value 260.690189
## iter 80 value 256.572845
## iter 90 value 255.416173
## iter 100 value 255.265274
## final value 255.265274
## stopped after 100 iterations
## # weights: 771
## initial value 1254.322447
## iter 10 value 451.018846
## iter 20 value 415.528655
## iter 30 value 402.120559
## iter 40 value 394.088544
## iter 50 value 393.818793
## iter 60 value 392.721256
## iter 70 value 390.407120
## iter 80 value 389.895820
## iter 90 value 389.797848
## iter 100 value 389.787031
## final value 389.787031
## stopped after 100 iterations
## # weights: 221
## initial value 1392.425628
## iter 10 value 411.483201
## iter 20 value 408.742637
## iter 30 value 407.945204
## iter 40 value 396.984244
## iter 50 value 391.058888
## iter 60 value 385.915573
## iter 70 value 379.012578
## iter 80 value 298.322581
## iter 90 value 277.878148
## iter 100 value 276.859115
## final value 276.859115
## stopped after 100 iterations
## # weights: 331
## initial value 1382.859021
## iter 10 value 407.370061
## iter 20 value 407.176565
## iter 30 value 395.899627
## iter 40 value 395.706723
## iter 50 value 395.700806
## iter 60 value 395.541353
## iter 70 value 395.470607
## iter 80 value 394.649707
## iter 90 value 390.892484
## iter 100 value 373.061883
## final value 373.061883
## stopped after 100 iterations
## # weights: 551
## initial value 2149.580222
## iter 10 value 417.058408
## iter 20 value 408.582786
## iter 30 value 406.106868
## iter 40 value 402.964728
## iter 50 value 390.833017
## iter 60 value 353.044941
## iter 70 value 295.642414
## iter 80 value 287.149724
## iter 90 value 277.517982
## iter 100 value 276.375759
## final value 276.375759
## stopped after 100 iterations
## # weights: 771
## initial value 2693.848887
## iter 10 value 410.775132
## iter 20 value 400.429601
## iter 30 value 395.116441
## iter 40 value 393.995829
## iter 50 value 393.371924
## iter 60 value 390.253023
## iter 70 value 384.261623
## iter 80 value 373.482905
## iter 90 value 329.328634
## iter 100 value 284.222265
## final value 284.222265
## stopped after 100 iterations
## # weights: 221
## initial value 2841.927590
## iter 10 value 408.190379
## iter 20 value 407.310835
## iter 30 value 407.260053
## iter 40 value 406.454889
## iter 50 value 406.447611
## final value 406.447584
## converged
## # weights: 331
## initial value 2308.684339
## iter 10 value 410.933839
## iter 20 value 399.975846
## iter 30 value 399.540579
## iter 40 value 399.383150
## iter 50 value 399.034432
## iter 60 value 398.996703
## final value 398.996451
## converged
## # weights: 551
## initial value 749.565389
## iter 10 value 401.975568
## iter 20 value 399.018956
## iter 30 value 398.477992
## iter 40 value 398.120247
## iter 50 value 397.796948
## iter 60 value 397.697198
## final value 397.692517
## converged
## # weights: 771
## initial value 890.105510
## iter 10 value 403.465952
## iter 20 value 398.161924
## iter 30 value 393.152954
## iter 40 value 383.242419
## iter 50 value 333.945381
## iter 60 value 331.077253
## iter 70 value 308.799322
## iter 80 value 284.545570
## iter 90 value 262.605228
## iter 100 value 257.735506
## final value 257.735506
## stopped after 100 iterations
## # weights: 221
## initial value 3439.596740
## iter 10 value 410.763435
## iter 20 value 403.121717
## final value 401.682198
## converged
## # weights: 331
## initial value 1600.342938
## iter 10 value 407.648564
## iter 20 value 398.613043
## iter 30 value 397.058136
## iter 40 value 395.789353
## iter 50 value 389.360849
## iter 60 value 364.217666
## iter 70 value 327.683252
## iter 80 value 316.508613
## iter 90 value 305.235095
## iter 100 value 272.994455
## final value 272.994455
## stopped after 100 iterations
## # weights: 551
## initial value 1201.150165
## iter 10 value 405.463221
## iter 20 value 400.001155
## iter 30 value 398.574829
## iter 40 value 398.034985
## iter 50 value 397.887576
## iter 60 value 397.813364
## iter 70 value 397.775824
## iter 80 value 396.312466
## iter 90 value 390.872495
## iter 100 value 386.625450
## final value 386.625450
## stopped after 100 iterations
## # weights: 771
## initial value 3402.428895
## iter 10 value 407.503560
## iter 20 value 405.495424
## iter 30 value 399.295854
## iter 40 value 397.962241
## iter 50 value 397.409165
## iter 60 value 392.257880
## iter 70 value 389.429798
## iter 80 value 386.442411
## iter 90 value 368.839667
## iter 100 value 360.149440
## final value 360.149440
## stopped after 100 iterations
## # weights: 221
## initial value 2951.195531
## iter 10 value 420.660571
## iter 20 value 419.952911
## iter 30 value 414.570170
## iter 40 value 399.078938
## iter 50 value 388.716985
## iter 60 value 363.208753
## iter 70 value 310.747471
## iter 80 value 290.283893
## iter 90 value 283.416502
## iter 100 value 282.125804
## final value 282.125804
## stopped after 100 iterations
## # weights: 331
## initial value 2621.010713
## iter 10 value 420.365203
## iter 20 value 410.215185
## iter 30 value 400.435176
## iter 40 value 395.693200
## iter 50 value 392.675972
## iter 60 value 382.449067
## iter 70 value 323.143153
## iter 80 value 307.805747
## iter 90 value 294.083212
## iter 100 value 286.879003
## final value 286.879003
## stopped after 100 iterations
## # weights: 551
## initial value 1289.204100
## iter 10 value 416.159932
## iter 20 value 413.539686
## iter 30 value 407.629227
## iter 40 value 407.356715
## iter 50 value 406.084117
## iter 60 value 399.956908
## iter 70 value 388.512638
## iter 80 value 334.575048
## iter 90 value 310.447315
## iter 100 value 306.738634
## final value 306.738634
## stopped after 100 iterations
## # weights: 771
## initial value 3498.777832
## iter 10 value 432.466094
## iter 20 value 424.403960
## iter 30 value 404.384914
## iter 40 value 400.376202
## iter 50 value 399.248312
## iter 60 value 395.211162
## iter 70 value 393.532939
## iter 80 value 387.495041
## iter 90 value 383.189974
## iter 100 value 381.227176
## final value 381.227176
## stopped after 100 iterations
## # weights: 221
## initial value 4197.036092
## iter 10 value 607.377587
## iter 20 value 607.359193
## iter 30 value 605.018445
## iter 40 value 594.296585
## iter 50 value 564.919586
## iter 60 value 534.098525
## iter 70 value 427.187659
## iter 80 value 396.090599
## iter 90 value 394.696603
## iter 100 value 394.479929
## final value 394.479929
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n1_NN1Fit0
## Neural Network
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3278, 3278, 3278
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9733577 0.068380213
## 2 0.5 0.9741712 0.054307116
## 2 0.7 0.9733577 0.000000000
## 3 0.3 0.9733577 0.000000000
## 3 0.5 0.9731544 0.027224806
## 3 0.7 0.9733577 0.000000000
## 5 0.3 0.9733577 0.000000000
## 5 0.5 0.9741712 0.054307116
## 5 0.7 0.9733577 0.000000000
## 7 0.3 0.9733577 0.000000000
## 7 0.5 0.9727476 -0.001146296
## 7 0.7 0.9733577 0.000000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9737645 0.0000000 Fold3
## 2 0.9755949 0.1629213 Fold2
## 3 0.9731544 0.0000000 Fold1
ad_tda_pc_5.50.5_n1_nn1_fit_re<-Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 -0.03 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.09 0.00 0.01 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 2.33 -0.06 0.82 0.35 0.42 0.00 1.56 -0.81
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.69 0.68 0.00 0.00 0.17 0.30 0.24 0.01
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.14 0.73 0.25 -0.08 0.39 -0.19 -1.38 1.30
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.47 0.00 -0.55 1.47 -0.08 0.35 0.24 1.74
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.82 0.65 0.01 -0.40
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.21 -0.28 0.07 0.38 0.18 0.00 -0.25 0.53
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.70 0.48 -0.34 0.75 0.41 0.14 0.00 0.06
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.96 0.31 0.42 0.93 0.27 0.39 0.96 1.37
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 -0.05 0.59 0.00 0.20 -0.61 0.19
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.50 0.00 0.00 0.13 0.43 0.19 0.47 0.13
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.01 0.02 -0.64 -0.77
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.02 0.24 0.00 0.06 0.01 0.32 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.14 0.04 0.06 0.06 0.17 -0.01 0.62
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.04 0.00 -0.32 0.00 0.01
## b->o h1->o h2->o
## 0.28 -0.17 6.62
vip(Adult_TDA_PC_5.50.5_n1_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_pc_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n1_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n1_3_fold
## Accuracy
## 1 -0.1451254
## 2 -0.1293398
## 3 -0.1662315
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9912333
##
## $winRope
## [1] 0.008766667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.9959864
##
## $rope
## [1] 0.0009492257
##
## $right
## [1] 0.003064356
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
#bf_tda_pca_5.50.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold)
## t = -13.746, df = 2, p-value = 0.005251
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1928795 -0.1009183
## sample estimates:
## mean of x
## -0.1468989
### Test set diff
diff_tda_pca_5.50.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n1_test
## Accuracy
## 0.5630631
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n1_test_odds.left<-bst_tda_pca_5.50.5_nn1.n1_test$probLeft/bst_tda_pca_5.50.5_nn1.n1_test$probRight
bst_tda_pca_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1593667
##
## $winRight
## [1] 0.8406333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
##Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, size=2, range = 0.6,, type='class')
#Neural Network 1
Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 8501.489925
## iter 10 value 5602.512076
## iter 20 value 5479.563941
## iter 30 value 5242.987078
## iter 40 value 5224.069749
## iter 50 value 5197.250019
## iter 60 value 5182.016418
## iter 70 value 5154.983647
## iter 80 value 5106.521450
## iter 90 value 4995.575971
## iter 100 value 4719.347351
## final value 4719.347351
## stopped after 100 iterations
## # weights: 331
## initial value 5674.318127
## iter 10 value 5606.335713
## iter 20 value 5541.955405
## iter 30 value 5393.046334
## iter 40 value 5236.529370
## iter 50 value 5181.658157
## iter 60 value 5147.881376
## iter 70 value 4899.259728
## iter 80 value 4559.931261
## iter 90 value 4463.006167
## iter 100 value 4397.988800
## final value 4397.988800
## stopped after 100 iterations
## # weights: 551
## initial value 6148.593218
## iter 10 value 5367.581876
## iter 20 value 5301.948740
## iter 30 value 5239.090475
## iter 40 value 5219.942302
## iter 50 value 5190.334687
## iter 60 value 5165.238147
## iter 70 value 5145.292867
## iter 80 value 5124.977846
## iter 90 value 5094.394583
## iter 100 value 4939.138367
## final value 4939.138367
## stopped after 100 iterations
## # weights: 771
## initial value 7630.352146
## iter 10 value 5566.475071
## iter 20 value 5348.243462
## iter 30 value 5338.628861
## iter 40 value 5333.505802
## iter 50 value 5327.435567
## iter 60 value 5318.156729
## iter 70 value 5289.030076
## iter 80 value 5250.152031
## iter 90 value 5188.217635
## iter 100 value 4967.599152
## final value 4967.599152
## stopped after 100 iterations
## # weights: 221
## initial value 5688.532556
## iter 10 value 5603.851503
## iter 20 value 5587.764128
## iter 30 value 5586.467014
## iter 40 value 5586.229355
## iter 50 value 5476.331379
## iter 60 value 5341.794144
## iter 70 value 5319.639780
## iter 80 value 5317.164490
## iter 90 value 5317.067232
## iter 100 value 5317.052079
## final value 5317.052079
## stopped after 100 iterations
## # weights: 331
## initial value 5924.103932
## iter 10 value 5564.082832
## iter 20 value 5549.278100
## iter 30 value 5352.254079
## iter 40 value 5316.628910
## iter 50 value 5264.582555
## iter 60 value 5236.127171
## iter 70 value 5213.616614
## iter 80 value 5205.933433
## iter 90 value 5163.496211
## iter 100 value 5122.296066
## final value 5122.296066
## stopped after 100 iterations
## # weights: 551
## initial value 5682.784565
## iter 10 value 5590.139938
## iter 20 value 5487.890996
## iter 30 value 5461.644350
## iter 40 value 5383.205058
## iter 50 value 5263.092977
## iter 60 value 4784.923006
## iter 70 value 4574.033354
## iter 80 value 4453.232250
## iter 90 value 4445.492375
## iter 100 value 4428.645044
## final value 4428.645044
## stopped after 100 iterations
## # weights: 771
## initial value 5678.307466
## iter 10 value 5563.283208
## iter 20 value 5320.774287
## iter 30 value 5312.164702
## iter 40 value 5295.506320
## iter 50 value 5289.275912
## iter 60 value 5280.127461
## iter 70 value 5273.647246
## iter 80 value 5263.023222
## iter 90 value 5239.110643
## iter 100 value 5157.602774
## final value 5157.602774
## stopped after 100 iterations
## # weights: 221
## initial value 5635.539123
## iter 10 value 5591.803146
## iter 20 value 5533.626598
## iter 30 value 5533.604290
## iter 40 value 5282.219255
## iter 50 value 5257.652555
## iter 60 value 5204.959304
## iter 70 value 5193.228398
## iter 80 value 5136.200003
## iter 90 value 4947.496546
## iter 100 value 4657.075371
## final value 4657.075371
## stopped after 100 iterations
## # weights: 331
## initial value 6791.111388
## iter 10 value 5381.993847
## iter 20 value 5284.613384
## iter 30 value 5203.971963
## iter 40 value 5172.626133
## iter 50 value 5076.857923
## iter 60 value 4779.158131
## iter 70 value 4655.033896
## iter 80 value 4635.293455
## iter 90 value 4467.745351
## iter 100 value 4393.977069
## final value 4393.977069
## stopped after 100 iterations
## # weights: 551
## initial value 6805.592754
## iter 10 value 5454.440697
## iter 20 value 5340.105906
## iter 30 value 5332.934276
## iter 40 value 5271.119800
## iter 50 value 5265.824221
## iter 60 value 5263.743045
## iter 70 value 5259.949051
## iter 80 value 5253.410759
## final value 5252.717326
## converged
## # weights: 771
## initial value 5923.382873
## iter 10 value 5359.866895
## iter 20 value 5283.407851
## iter 30 value 5249.501355
## iter 40 value 5219.752243
## iter 50 value 5185.610500
## iter 60 value 5143.071181
## iter 70 value 5118.664645
## iter 80 value 5104.569192
## iter 90 value 5100.524964
## iter 100 value 5089.128876
## final value 5089.128876
## stopped after 100 iterations
## # weights: 221
## initial value 6476.846192
## iter 10 value 5384.328284
## iter 20 value 5340.451463
## iter 30 value 5196.106782
## iter 40 value 5179.814870
## iter 50 value 5157.067101
## iter 60 value 5141.963942
## iter 70 value 5135.948469
## iter 80 value 5127.193365
## iter 90 value 5076.104933
## iter 100 value 5008.686911
## final value 5008.686911
## stopped after 100 iterations
## # weights: 331
## initial value 5635.852909
## iter 10 value 5482.846950
## iter 20 value 5451.801758
## iter 30 value 5404.050079
## iter 40 value 5342.856386
## iter 50 value 5218.146743
## iter 60 value 5120.473739
## iter 70 value 4747.373284
## iter 80 value 4609.670456
## iter 90 value 4434.959304
## iter 100 value 4337.784229
## final value 4337.784229
## stopped after 100 iterations
## # weights: 551
## initial value 5692.181753
## iter 10 value 5390.494890
## iter 20 value 5370.194507
## iter 30 value 5346.748222
## iter 40 value 5333.762368
## iter 50 value 5322.913535
## iter 60 value 5215.907106
## iter 70 value 5185.954248
## iter 80 value 5174.562913
## iter 90 value 5166.894689
## iter 100 value 5152.796790
## final value 5152.796790
## stopped after 100 iterations
## # weights: 771
## initial value 5683.084985
## iter 10 value 5608.586977
## iter 20 value 5606.497741
## iter 30 value 5354.192673
## iter 40 value 5351.495641
## iter 50 value 5351.460618
## iter 60 value 5350.751976
## iter 70 value 5340.737616
## iter 80 value 5312.617355
## iter 90 value 5155.574615
## iter 100 value 5110.736246
## final value 5110.736246
## stopped after 100 iterations
## # weights: 221
## initial value 5751.862863
## iter 10 value 5538.622789
## iter 20 value 5356.702283
## iter 30 value 5350.287860
## iter 40 value 5338.920565
## iter 50 value 5333.962313
## iter 60 value 5333.331168
## iter 70 value 5315.918581
## iter 80 value 5227.564705
## iter 90 value 5184.072379
## iter 100 value 5108.277229
## final value 5108.277229
## stopped after 100 iterations
## # weights: 331
## initial value 5777.695921
## iter 10 value 5392.053482
## iter 20 value 5226.299885
## iter 30 value 5217.471809
## iter 40 value 5209.822838
## iter 50 value 5181.393693
## iter 60 value 5174.852310
## iter 70 value 5161.859776
## iter 80 value 5153.347679
## iter 90 value 5147.818682
## iter 100 value 5136.653708
## final value 5136.653708
## stopped after 100 iterations
## # weights: 551
## initial value 5621.403853
## iter 10 value 5438.756421
## iter 20 value 5272.625547
## iter 30 value 5262.158460
## iter 40 value 5221.067169
## iter 50 value 5175.806839
## iter 60 value 5136.086176
## iter 70 value 4916.776165
## iter 80 value 4735.000216
## iter 90 value 4645.270568
## iter 100 value 4552.562131
## final value 4552.562131
## stopped after 100 iterations
## # weights: 771
## initial value 6808.139907
## iter 10 value 5603.958554
## iter 20 value 5349.796568
## iter 30 value 5227.585991
## iter 40 value 5212.622023
## iter 50 value 5167.009083
## iter 60 value 5126.643094
## iter 70 value 5123.562355
## iter 80 value 5115.845076
## iter 90 value 5111.715972
## iter 100 value 5108.420798
## final value 5108.420798
## stopped after 100 iterations
## # weights: 221
## initial value 5727.481271
## iter 10 value 5486.641967
## iter 20 value 5380.007854
## iter 30 value 5351.967940
## iter 40 value 5344.417360
## iter 50 value 5337.963105
## iter 60 value 5325.910175
## iter 70 value 5263.441330
## iter 80 value 4940.044017
## iter 90 value 4624.173305
## iter 100 value 4501.426782
## final value 4501.426782
## stopped after 100 iterations
## # weights: 331
## initial value 5658.123695
## iter 10 value 5575.143020
## iter 20 value 5330.767170
## iter 30 value 5289.030212
## iter 40 value 5279.485170
## iter 50 value 5266.697315
## iter 60 value 5257.519032
## iter 70 value 5199.404764
## iter 80 value 5176.869658
## iter 90 value 5168.955273
## iter 100 value 5163.035956
## final value 5163.035956
## stopped after 100 iterations
## # weights: 551
## initial value 6480.357122
## iter 10 value 5384.060742
## iter 20 value 5290.723163
## iter 30 value 5264.752408
## iter 40 value 5214.533502
## iter 50 value 5171.361576
## iter 60 value 5149.678894
## iter 70 value 5113.912907
## iter 80 value 5101.028024
## iter 90 value 5092.209698
## iter 100 value 5028.116751
## final value 5028.116751
## stopped after 100 iterations
## # weights: 771
## initial value 7215.041980
## iter 10 value 5612.445716
## iter 20 value 5545.959869
## iter 30 value 5337.472064
## iter 40 value 5318.278993
## iter 50 value 5311.504748
## iter 60 value 5299.014522
## iter 70 value 5223.165705
## iter 80 value 5216.621155
## iter 90 value 5177.537643
## iter 100 value 5107.760574
## final value 5107.760574
## stopped after 100 iterations
## # weights: 221
## initial value 6919.487025
## iter 10 value 5586.628261
## iter 20 value 5274.810689
## iter 30 value 5069.851829
## iter 40 value 4875.523664
## iter 50 value 4632.917259
## iter 60 value 4583.310314
## iter 70 value 4444.477266
## iter 80 value 4368.095634
## iter 90 value 4312.518980
## iter 100 value 4280.243676
## final value 4280.243676
## stopped after 100 iterations
## # weights: 331
## initial value 6108.023993
## iter 10 value 5425.942831
## iter 20 value 5339.370659
## iter 30 value 5336.241449
## iter 40 value 5331.598715
## iter 50 value 5323.451570
## iter 60 value 5319.572255
## iter 70 value 5318.958319
## iter 80 value 5306.194456
## iter 90 value 5223.854593
## iter 100 value 5160.880142
## final value 5160.880142
## stopped after 100 iterations
## # weights: 551
## initial value 5652.322395
## iter 10 value 5493.042972
## iter 20 value 5355.461727
## iter 30 value 5250.919931
## iter 40 value 5198.974734
## iter 50 value 5172.547671
## iter 60 value 5161.393190
## iter 70 value 5122.575687
## iter 80 value 4889.705205
## iter 90 value 4635.675146
## iter 100 value 4489.481150
## final value 4489.481150
## stopped after 100 iterations
## # weights: 771
## initial value 5860.548042
## iter 10 value 5537.338574
## iter 20 value 5370.058976
## iter 30 value 5297.878408
## iter 40 value 5223.657686
## iter 50 value 5187.272019
## iter 60 value 5164.584213
## iter 70 value 4811.661745
## iter 80 value 4675.543024
## iter 90 value 4575.921934
## iter 100 value 4515.676304
## final value 4515.676304
## stopped after 100 iterations
## # weights: 221
## initial value 5651.731634
## iter 10 value 5608.235414
## iter 10 value 5608.235410
## iter 10 value 5608.235408
## final value 5608.235408
## converged
## # weights: 331
## initial value 5695.125048
## iter 10 value 5575.871931
## iter 20 value 5574.114115
## iter 30 value 5570.837039
## iter 40 value 5569.530523
## iter 50 value 5431.361527
## iter 60 value 5345.820210
## iter 70 value 5342.799769
## iter 80 value 5340.003955
## iter 90 value 5328.830207
## iter 100 value 5238.856658
## final value 5238.856658
## stopped after 100 iterations
## # weights: 551
## initial value 6561.083244
## iter 10 value 5611.774392
## iter 20 value 5346.302182
## iter 30 value 5336.068878
## iter 40 value 5333.848430
## iter 50 value 5318.823588
## iter 60 value 5309.810633
## iter 70 value 5290.808997
## iter 80 value 5278.920705
## iter 90 value 5182.860132
## iter 100 value 4943.133823
## final value 4943.133823
## stopped after 100 iterations
## # weights: 771
## initial value 5682.464927
## iter 10 value 5496.623693
## iter 20 value 5374.316492
## iter 30 value 5219.683456
## iter 40 value 5211.331569
## iter 50 value 5179.679189
## iter 60 value 5170.725766
## iter 70 value 5160.559853
## iter 80 value 5147.665777
## iter 90 value 5071.454125
## iter 100 value 5047.611715
## final value 5047.611715
## stopped after 100 iterations
## # weights: 221
## initial value 5831.880290
## iter 10 value 5380.277809
## iter 20 value 5317.874703
## iter 30 value 5198.232111
## iter 40 value 5128.522143
## iter 50 value 4977.813840
## iter 60 value 4774.380041
## iter 70 value 4587.656551
## iter 80 value 4480.854345
## iter 90 value 4390.998143
## iter 100 value 4327.113953
## final value 4327.113953
## stopped after 100 iterations
## # weights: 331
## initial value 5717.759937
## iter 10 value 5607.732799
## final value 5607.526605
## converged
## # weights: 551
## initial value 5887.820435
## iter 10 value 5517.994112
## iter 20 value 5289.746428
## iter 30 value 5242.019551
## iter 40 value 5141.592401
## iter 50 value 4752.297362
## iter 60 value 4604.780857
## iter 70 value 4432.408981
## iter 80 value 4396.898190
## iter 90 value 4359.971549
## iter 100 value 4344.451765
## final value 4344.451765
## stopped after 100 iterations
## # weights: 771
## initial value 6085.347488
## iter 10 value 5593.712643
## iter 20 value 5352.628883
## iter 30 value 5345.612490
## iter 40 value 5342.245125
## iter 50 value 5340.376507
## iter 60 value 5334.729747
## iter 70 value 5333.636490
## iter 80 value 5333.020678
## iter 90 value 5332.805521
## iter 100 value 5267.104587
## final value 5267.104587
## stopped after 100 iterations
## # weights: 221
## initial value 8447.227381
## iter 10 value 8021.852433
## iter 20 value 8003.153719
## iter 30 value 8000.769078
## iter 40 value 7994.649780
## iter 50 value 7958.350979
## iter 60 value 7898.325809
## iter 70 value 7753.928654
## iter 80 value 7592.614818
## iter 90 value 7032.877260
## iter 100 value 6735.255207
## final value 6735.255207
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n2_NN1Fit0
## Neural Network
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8137, 8137, 8138
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.6839289 0.37716216
## 2 0.5 0.5560371 0.03764186
## 2 0.7 0.7143212 0.43025567
## 3 0.3 0.6735136 0.36276157
## 3 0.5 0.5652119 0.15363819
## 3 0.7 0.6114971 0.20945174
## 5 0.3 0.6137229 0.21234778
## 5 0.5 0.6994074 0.39635213
## 5 0.7 0.6370702 0.28692174
## 7 0.3 0.6772110 0.34572762
## 7 0.5 0.5956091 0.21243700
## 7 0.7 0.5878243 0.20003391
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7192724 0.4370092 Fold3
## 2 0.7151634 0.4368938 Fold2
## 3 0.7085279 0.4168640 Fold1
ad_tda_pc_5.50.5_n2_nn1_fit_re<-Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.60 0.00 0.19 -0.31 1.20 0.00 0.26 -0.09
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.05 -0.74 0.03 0.00 -0.54 0.38 0.87 -0.62
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## -1.62 -0.10 -0.17 0.73 2.34 -0.14 -3.31 2.49
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -0.95 0.00 -0.12 1.34 -0.39 -1.09 -0.20 1.57
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.13 1.43 -0.25 -0.73 0.19 0.63 -0.05 0.07
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -0.86 1.78 0.71 -0.07 0.67 0.03 -1.14 -0.14
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.41 -0.42 -0.40 3.57 0.24 -0.37 -0.61 -0.91
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## -1.31 0.24 -0.06 0.08 0.15 0.19 -1.34 1.93
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 -0.03 0.29 -0.13 -0.49 0.41 0.54
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## -0.05 0.22 0.12 0.17 -0.57 -0.05 -0.66 0.38
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.08 0.48 0.00 -0.01 -0.34 -0.05 0.48 0.57
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## -0.18 -0.61 0.42 0.14 -0.05 0.21 0.58 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.06 -0.89 -0.19 0.00 -0.07 -0.28 0.46 -0.47
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.06 -0.08 -0.63 0.94 0.03
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.28 0.07 0.26 1.06 0.58 0.00 1.19 0.39
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -2.88 -0.85 -0.02 0.00 -1.21 -1.70 -0.26 0.21
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.97 1.20 -1.54 0.83 0.45 1.38 -1.08 -0.36
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.06 0.00 -0.10 0.87 -0.46 -0.12 0.15 -0.59
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## -0.03 0.84 0.21 -0.74 0.26 1.47 -0.07 -0.48
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 1.16 -3.41 -0.27 0.01 0.28 0.05 -0.09 0.99
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.26 0.85 -1.30 -0.73 0.04 -0.19 -0.10 0.01
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.69 -0.11 -0.22 0.80 -0.15 -0.61 0.84 -1.13
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.01 0.08 -0.25 0.07 0.14 -0.08 -0.27
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.49 -0.19 -0.13 0.07 0.20 0.14 -0.35 0.03
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 -0.06 0.00 0.00 0.04 -0.11 -0.56 0.15
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.05 0.05 -0.09 0.00 0.02 0.23 -0.23 -0.01
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.01 0.34 -0.20 0.03 -0.08 0.07 -0.06 -0.07
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.09 -0.15 0.20 0.17
## b->o h1->o h2->o
## 1.46 -3.05 1.48
vip(Adult_TDA_PC_5.50.5_n2_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2071 656
## >50K 5345 1696
##
## Accuracy : 0.3856
## 95% CI : (0.376, 0.3954)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 2e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2793
## Specificity : 0.7211
## Pos Pred Value : 0.7594
## Neg Pred Value : 0.2409
## Prevalence : 0.7592
## Detection Rate : 0.2120
## Detection Prevalence : 0.2792
## Balanced Accuracy : 0.5002
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2071 656
## >50K 5345 1696
##
## Accuracy : 0.3856
## 95% CI : (0.376, 0.3954)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 2e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2793
## Specificity : 0.7211
## Pos Pred Value : 0.7594
## Neg Pred Value : 0.2409
## Prevalence : 0.7592
## Detection Rate : 0.2120
## Detection Prevalence : 0.2792
## Balanced Accuracy : 0.5002
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.3856470106 0.0002079483 0.3759784054 0.3953838481 0.7592137592
## AccuracyPValue McnemarPValue
## 1.0000000000 0.0000000000
ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2792611 0.7210884 0.7594426
## Neg Pred Value Precision Recall
## 0.2408749 0.7594426 0.2792611
## F1 Prevalence Detection Rate
## 0.4083604 0.7592138 0.2120188
## Detection Prevalence Balanced Accuracy
## 0.2791769 0.5001747
ad_tda_pc_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n2_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n2_3_fold
## Accuracy
## 1 0.10936675
## 2 0.13109167
## 3 0.09839498
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0089
##
## $winRight
## [1] 0.9911
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.004021497
##
## $rope
## [1] 0.001685006
##
## $right
## [1] 0.9942935
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
#bf_tda_pca_5.50.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold)
## t = 11.757, df = 2, p-value = 0.007157
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.07161399 0.15428828
## sample estimates:
## mean of x
## 0.1129511
### Test set diff
diff_tda_pca_5.50.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n2_test
## Accuracy
## 0.4182023
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n2_test_odds.left<-bst_tda_pca_5.50.5_nn1.n2_test$probLeft/bst_tda_pca_5.50.5_nn1.n2_test$probRight
bst_tda_pca_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1632
##
## $winRight
## [1] 0.8368
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node3
#Neural Network 1
Adult_TDA_PC_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4879.915615
## iter 10 value 4743.347164
## iter 20 value 4588.292117
## iter 30 value 4439.052935
## iter 40 value 4390.360078
## iter 50 value 4360.966943
## iter 60 value 4302.590832
## iter 70 value 4289.248075
## iter 80 value 4276.601422
## iter 90 value 4243.584358
## iter 100 value 4178.327807
## final value 4178.327807
## stopped after 100 iterations
## # weights: 331
## initial value 5717.671774
## iter 10 value 4590.052283
## iter 20 value 4558.250512
## iter 30 value 4557.583672
## iter 40 value 4440.789131
## iter 50 value 4395.767751
## iter 60 value 4314.179807
## iter 70 value 4300.861221
## iter 80 value 4298.080517
## iter 90 value 4295.598851
## iter 100 value 4256.146925
## final value 4256.146925
## stopped after 100 iterations
## # weights: 551
## initial value 6908.583838
## iter 10 value 4623.386189
## iter 20 value 4552.732983
## iter 30 value 4548.631516
## iter 40 value 4534.837916
## iter 50 value 4525.985493
## iter 60 value 4304.967451
## iter 70 value 4131.243820
## iter 80 value 4047.295098
## iter 90 value 4037.157618
## iter 100 value 4034.897931
## final value 4034.897931
## stopped after 100 iterations
## # weights: 771
## initial value 5539.328058
## iter 10 value 4559.327449
## iter 20 value 4461.207877
## iter 30 value 4455.104145
## iter 40 value 4329.993603
## iter 50 value 4286.033367
## iter 60 value 4221.404478
## iter 70 value 4099.533756
## iter 80 value 4028.892150
## iter 90 value 4008.616142
## iter 100 value 3952.765178
## final value 3952.765178
## stopped after 100 iterations
## # weights: 221
## initial value 6434.195777
## iter 10 value 4746.054348
## iter 20 value 4691.708094
## iter 30 value 4446.109012
## iter 40 value 4412.413185
## iter 50 value 4329.976016
## iter 60 value 4289.483677
## iter 70 value 4250.808631
## iter 80 value 4239.974246
## iter 90 value 4142.767420
## iter 100 value 4031.674428
## final value 4031.674428
## stopped after 100 iterations
## # weights: 331
## initial value 6491.120192
## iter 10 value 4478.544448
## iter 20 value 4448.093289
## iter 30 value 4445.296967
## iter 40 value 4419.316281
## iter 50 value 4369.235239
## iter 60 value 4309.403417
## iter 70 value 4031.950589
## iter 80 value 3754.356687
## iter 90 value 3594.700647
## iter 100 value 3538.361437
## final value 3538.361437
## stopped after 100 iterations
## # weights: 551
## initial value 7331.608628
## iter 10 value 4572.231963
## iter 20 value 4464.311572
## iter 30 value 4457.357831
## iter 40 value 4441.822628
## iter 50 value 4376.180568
## iter 60 value 4258.583702
## iter 70 value 4079.934709
## iter 80 value 3939.805425
## iter 90 value 3924.361920
## iter 100 value 3836.908717
## final value 3836.908717
## stopped after 100 iterations
## # weights: 771
## initial value 8183.196746
## iter 10 value 4759.170167
## iter 20 value 4743.734679
## iter 30 value 4575.282007
## iter 40 value 4437.663928
## iter 50 value 4346.923774
## iter 60 value 4336.171450
## iter 70 value 4314.683624
## iter 80 value 4296.495611
## iter 90 value 4271.573084
## iter 100 value 4178.279287
## final value 4178.279287
## stopped after 100 iterations
## # weights: 221
## initial value 5008.681822
## iter 10 value 4741.239082
## iter 20 value 4539.380981
## iter 30 value 4492.109512
## iter 40 value 4463.855871
## iter 50 value 4400.378848
## iter 60 value 4162.434503
## iter 70 value 3909.766774
## iter 80 value 3895.525762
## iter 90 value 3821.167083
## iter 100 value 3755.331587
## final value 3755.331587
## stopped after 100 iterations
## # weights: 331
## initial value 4792.655014
## iter 10 value 4744.797266
## iter 20 value 4743.583177
## iter 30 value 4702.715292
## iter 40 value 4674.294803
## iter 50 value 4449.610920
## iter 60 value 4446.122763
## iter 70 value 4432.072984
## iter 80 value 4429.208847
## iter 90 value 4418.239432
## iter 100 value 4408.310485
## final value 4408.310485
## stopped after 100 iterations
## # weights: 551
## initial value 7736.057375
## iter 10 value 4728.831203
## iter 20 value 4455.716820
## iter 30 value 4455.245690
## iter 40 value 4450.498672
## iter 50 value 4449.124462
## iter 60 value 4434.287770
## iter 70 value 4410.628138
## iter 80 value 4367.882895
## iter 90 value 4339.891879
## iter 100 value 4326.651892
## final value 4326.651892
## stopped after 100 iterations
## # weights: 771
## initial value 6634.381569
## iter 10 value 4645.650203
## iter 20 value 4461.200724
## iter 30 value 4456.385584
## iter 40 value 4453.756836
## iter 50 value 4451.989923
## iter 60 value 4448.961069
## iter 70 value 4439.774996
## iter 80 value 4391.684036
## iter 90 value 4329.771330
## iter 100 value 4303.631277
## final value 4303.631277
## stopped after 100 iterations
## # weights: 221
## initial value 7436.966133
## iter 10 value 4744.831272
## iter 20 value 4689.467117
## iter 30 value 4474.327974
## iter 40 value 4451.982367
## iter 50 value 4390.194016
## iter 60 value 4281.047730
## iter 70 value 4252.474211
## iter 80 value 4227.933641
## iter 90 value 4180.093085
## iter 100 value 3960.693394
## final value 3960.693394
## stopped after 100 iterations
## # weights: 331
## initial value 7031.593739
## iter 10 value 4744.285853
## iter 10 value 4744.285852
## iter 10 value 4744.285819
## final value 4744.285819
## converged
## # weights: 551
## initial value 6049.686714
## iter 10 value 4681.663691
## iter 20 value 4469.216910
## iter 30 value 4458.424468
## iter 40 value 4448.614478
## iter 50 value 4419.952357
## iter 60 value 4372.360183
## iter 70 value 4287.411331
## iter 80 value 4271.079840
## iter 90 value 4220.646255
## iter 100 value 4173.478094
## final value 4173.478094
## stopped after 100 iterations
## # weights: 771
## initial value 4841.193604
## iter 10 value 4728.354369
## iter 20 value 4710.279648
## iter 30 value 4548.352445
## iter 40 value 4519.495354
## iter 50 value 4481.979282
## iter 60 value 4458.186740
## iter 70 value 4449.000353
## iter 80 value 4447.704714
## iter 90 value 4445.253038
## iter 100 value 4437.621400
## final value 4437.621400
## stopped after 100 iterations
## # weights: 221
## initial value 6137.545859
## iter 10 value 4567.823244
## iter 20 value 4479.361485
## iter 30 value 4423.109661
## iter 40 value 4356.048283
## iter 50 value 4335.289193
## iter 60 value 4306.818330
## iter 70 value 4168.494715
## iter 80 value 3889.109872
## iter 90 value 3766.631692
## iter 100 value 3700.522889
## final value 3700.522889
## stopped after 100 iterations
## # weights: 331
## initial value 4831.734014
## iter 10 value 4723.220007
## iter 20 value 4488.363088
## iter 30 value 4469.012550
## iter 40 value 4464.506157
## iter 50 value 4417.764084
## iter 60 value 4345.181105
## iter 70 value 4312.060368
## iter 80 value 4277.271628
## iter 90 value 4068.703403
## iter 100 value 3837.796114
## final value 3837.796114
## stopped after 100 iterations
## # weights: 551
## initial value 5755.839451
## iter 10 value 4753.967443
## iter 20 value 4744.476163
## iter 30 value 4744.352344
## iter 40 value 4645.335907
## iter 50 value 4498.963957
## iter 60 value 4473.437108
## iter 70 value 4463.980294
## iter 80 value 4463.474165
## iter 90 value 4461.894376
## iter 100 value 4450.810662
## final value 4450.810662
## stopped after 100 iterations
## # weights: 771
## initial value 6373.849548
## iter 10 value 4544.364124
## iter 20 value 4471.579917
## iter 30 value 4458.884095
## iter 40 value 4328.601499
## iter 50 value 4130.865530
## iter 60 value 3909.542496
## iter 70 value 3899.689601
## iter 80 value 3843.685588
## iter 90 value 3805.279310
## iter 100 value 3700.999205
## final value 3700.999205
## stopped after 100 iterations
## # weights: 221
## initial value 8068.849973
## iter 10 value 4748.987219
## iter 20 value 4745.530792
## iter 30 value 4494.912395
## iter 40 value 4472.013977
## iter 50 value 4469.809937
## iter 60 value 4468.789707
## iter 70 value 4468.395799
## iter 80 value 4444.423215
## iter 90 value 4357.657434
## iter 100 value 4311.473092
## final value 4311.473092
## stopped after 100 iterations
## # weights: 331
## initial value 5450.277968
## iter 10 value 4745.036759
## iter 20 value 4497.857511
## iter 30 value 4374.536919
## iter 40 value 4307.618419
## iter 50 value 4294.810206
## iter 60 value 4290.389422
## iter 70 value 4246.026689
## iter 80 value 4113.148611
## iter 90 value 4004.690343
## iter 100 value 3841.911512
## final value 3841.911512
## stopped after 100 iterations
## # weights: 551
## initial value 11939.710793
## iter 10 value 4680.636221
## iter 20 value 4503.417759
## iter 30 value 4484.635539
## iter 40 value 4465.318136
## iter 50 value 4457.616017
## iter 60 value 4447.658017
## iter 70 value 4428.807542
## iter 80 value 4426.728793
## iter 90 value 4413.652531
## iter 100 value 4385.226803
## final value 4385.226803
## stopped after 100 iterations
## # weights: 771
## initial value 4989.161230
## iter 10 value 4727.822330
## iter 20 value 4539.994177
## iter 30 value 4482.795088
## iter 40 value 4453.834663
## iter 50 value 4442.613906
## iter 60 value 4377.639225
## iter 70 value 4323.971250
## iter 80 value 4292.383568
## iter 90 value 4262.514885
## iter 100 value 4238.450647
## final value 4238.450647
## stopped after 100 iterations
## # weights: 221
## initial value 7331.816310
## iter 10 value 4745.613146
## final value 4745.612921
## converged
## # weights: 331
## initial value 5665.840522
## iter 10 value 4732.439749
## iter 20 value 4512.019409
## iter 30 value 4509.099750
## iter 40 value 4489.990579
## iter 50 value 4476.166595
## iter 60 value 4456.938080
## iter 70 value 4417.618511
## iter 80 value 4363.154306
## iter 90 value 4332.725834
## iter 100 value 4317.083782
## final value 4317.083782
## stopped after 100 iterations
## # weights: 551
## initial value 10467.928860
## iter 10 value 4599.168140
## iter 20 value 4475.051095
## iter 30 value 4411.696255
## iter 40 value 4343.278509
## iter 50 value 4321.189767
## iter 60 value 4253.504321
## iter 70 value 4071.288139
## iter 80 value 3947.796311
## iter 90 value 3847.784742
## iter 100 value 3827.730223
## final value 3827.730223
## stopped after 100 iterations
## # weights: 771
## initial value 5257.258948
## iter 10 value 4611.017021
## iter 20 value 4480.430603
## iter 30 value 4474.793860
## iter 40 value 4474.401904
## iter 50 value 4470.872492
## iter 60 value 4437.812658
## iter 70 value 4427.773266
## iter 80 value 4352.751790
## iter 90 value 4309.415379
## iter 100 value 4295.831276
## final value 4295.831276
## stopped after 100 iterations
## # weights: 221
## initial value 7411.124742
## iter 10 value 4746.017917
## iter 20 value 4736.058560
## iter 30 value 4560.094087
## iter 40 value 4558.597382
## iter 50 value 4536.435687
## iter 60 value 4474.196717
## iter 70 value 4374.279824
## iter 80 value 4113.403575
## iter 90 value 3887.797574
## iter 100 value 3775.064598
## final value 3775.064598
## stopped after 100 iterations
## # weights: 331
## initial value 7745.337927
## iter 10 value 4655.189861
## iter 20 value 4596.415782
## iter 30 value 4585.962774
## iter 40 value 4477.632489
## iter 50 value 4473.349437
## iter 60 value 4472.668920
## iter 70 value 4454.277655
## iter 80 value 4347.262236
## iter 90 value 4336.343713
## iter 100 value 4329.886671
## final value 4329.886671
## stopped after 100 iterations
## # weights: 551
## initial value 10712.356114
## iter 10 value 4720.113559
## iter 20 value 4485.480143
## iter 30 value 4445.977773
## iter 40 value 4356.359805
## iter 50 value 4348.665498
## iter 60 value 4326.496732
## iter 70 value 4316.908592
## iter 80 value 4294.388905
## iter 90 value 4282.552246
## iter 100 value 4184.130452
## final value 4184.130452
## stopped after 100 iterations
## # weights: 771
## initial value 5919.586744
## iter 10 value 4733.872901
## iter 20 value 4721.875091
## iter 30 value 4720.556517
## iter 40 value 4438.767930
## iter 50 value 4351.584999
## iter 60 value 4340.370827
## iter 70 value 4305.318070
## iter 80 value 4252.155216
## iter 90 value 4090.920726
## iter 100 value 3957.698884
## final value 3957.698884
## stopped after 100 iterations
## # weights: 221
## initial value 5680.128924
## iter 10 value 4687.704731
## iter 20 value 4679.926447
## iter 30 value 4488.636328
## iter 40 value 4482.256924
## iter 50 value 4371.359927
## iter 60 value 4316.649897
## iter 70 value 4263.712220
## iter 80 value 4143.074334
## iter 90 value 3931.106330
## iter 100 value 3743.190202
## final value 3743.190202
## stopped after 100 iterations
## # weights: 331
## initial value 5340.671866
## iter 10 value 4761.479020
## iter 20 value 4760.031882
## iter 30 value 4723.472530
## iter 40 value 4484.147742
## iter 50 value 4474.479709
## iter 60 value 4462.713069
## iter 70 value 4432.763097
## iter 80 value 4386.784563
## iter 90 value 4146.629942
## iter 100 value 4026.973361
## final value 4026.973361
## stopped after 100 iterations
## # weights: 551
## initial value 8655.370719
## iter 10 value 4590.504931
## iter 20 value 4461.383781
## iter 30 value 4391.633613
## iter 40 value 4343.700563
## iter 50 value 4333.282413
## iter 60 value 4315.624975
## iter 70 value 4292.438364
## iter 80 value 4111.641646
## iter 90 value 4007.774019
## iter 100 value 3823.758104
## final value 3823.758104
## stopped after 100 iterations
## # weights: 771
## initial value 7207.998132
## iter 10 value 4732.380372
## iter 20 value 4468.188797
## iter 30 value 4441.390307
## iter 40 value 4431.013729
## iter 50 value 4402.109351
## iter 60 value 4313.238271
## iter 70 value 4280.791742
## iter 80 value 4274.593098
## iter 90 value 4258.924194
## iter 100 value 4257.464761
## final value 4257.464761
## stopped after 100 iterations
## # weights: 221
## initial value 8784.072643
## iter 10 value 7096.514418
## iter 20 value 6795.849461
## iter 30 value 6711.162734
## iter 40 value 6631.299212
## iter 50 value 6543.484733
## iter 60 value 6516.740081
## iter 70 value 6457.366924
## iter 80 value 6438.240478
## iter 90 value 6400.558606
## iter 100 value 6170.352610
## final value 6170.352610
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n3_NN1Fit0
## Neural Network
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8826, 8827, 8827
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7969024 0.2236936
## 2 0.5 0.8268131 0.4269583
## 2 0.7 0.8200900 0.3710531
## 3 0.3 0.7953163 0.1636010
## 3 0.5 0.8210718 0.3751329
## 3 0.7 0.8086114 0.3342103
## 5 0.3 0.8190331 0.3438761
## 5 0.5 0.8094401 0.3009132
## 5 0.7 0.8080064 0.2821051
## 7 0.3 0.8094400 0.2810180
## 7 0.5 0.8134448 0.3158719
## 7 0.7 0.8052116 0.2263384
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8282348 0.4266051 Fold3
## 2 0.8305008 0.4637494 Fold2
## 3 0.8217037 0.3905204 Fold1
ad_tda_pc_5.50.5_n3_nn1_fit_re<-Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -1.29 0.00 0.52 1.50 -2.37 0.00 -0.89 0.40
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.60 0.18 -0.03 0.00 0.93 0.63 0.47 -1.16
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## -1.34 -0.32 -0.32 -0.15 2.06 -2.01 2.00 -0.34
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -2.99 0.00 0.26 1.00 0.09 -0.21 0.01 -1.07
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## -0.34 0.59 0.58 -0.85 0.52 3.53 -0.03 -0.21
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.05 -1.82 -0.05 0.31 -1.61 0.00 -0.80 -0.45
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 1.12 -0.21 -1.53 0.42 -0.26 0.76 -1.05 -0.50
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.66 -0.02 0.50 2.55 -1.14 -3.19 3.02 -4.31
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.08 0.80 0.06 0.32 0.40 -0.01
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.69 0.00 0.02 0.07 0.05 0.21 0.19 -0.86
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 -0.21 0.00 -0.21 -0.01 -0.01 0.08 -0.08
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.03 0.13 0.05 0.12 -0.08 -1.39 -0.15 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## -0.02 1.22 0.14 0.01 -0.41 0.34 -0.57 0.01
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.02 -0.22 -0.48 -0.17 0.00
## b->o h1->o h2->o
## -0.77 -0.75 2.55
vip(Adult_TDA_PC_5.50.5_n3_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4968 1693
## >50K 2448 659
##
## Accuracy : 0.5761
## 95% CI : (0.5662, 0.5859)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.045
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6699
## Specificity : 0.2802
## Pos Pred Value : 0.7458
## Neg Pred Value : 0.2121
## Prevalence : 0.7592
## Detection Rate : 0.5086
## Detection Prevalence : 0.6819
## Balanced Accuracy : 0.4750
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4968 1693
## >50K 2448 659
##
## Accuracy : 0.5761
## 95% CI : (0.5662, 0.5859)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.045
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6699
## Specificity : 0.2802
## Pos Pred Value : 0.7458
## Neg Pred Value : 0.2121
## Prevalence : 0.7592
## Detection Rate : 0.5086
## Detection Prevalence : 0.6819
## Balanced Accuracy : 0.4750
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.760647e-01 -4.498026e-02 5.661919e-01 5.858921e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.042241e-31
ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6699029 0.2801871 0.7458340
## Neg Pred Value Precision Recall
## 0.2121017 0.7458340 0.6699029
## F1 Prevalence Detection Rate
## 0.7058322 0.7592138 0.5085995
## Detection Prevalence Balanced Accuracy
## 0.6819206 0.4750450
ad_tda_pc_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n3_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n3_3_fold
## Accuracy
## 1 0.0004043546
## 2 0.0157543076
## 3 -0.0147807957
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0.0501
##
## $winRope
## [1] 0.9011
##
## $winRight
## [1] 0.0488
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.2060871
##
## $rope
## [1] 0.5701477
##
## $right
## [1] 0.2237652
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
#bf_tda_pca_5.50.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold)
## t = 0.052104, df = 2, p-value = 0.9632
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03746760 0.03838617
## sample estimates:
## mean of x
## 0.0004592888
### Test set diff
diff_tda_pca_5.50.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n3_test
## Accuracy
## 0.2277846
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n3_test_odds.left<-bst_tda_pca_5.50.5_nn1.n3_test$probLeft/bst_tda_pca_5.50.5_nn1.n3_test$probRight
bst_tda_pca_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1562
##
## $winRight
## [1] 0.8438
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_PC_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 6738.487163
## iter 10 value 2374.533129
## iter 20 value 2374.292669
## iter 30 value 2362.497825
## iter 40 value 2207.317976
## iter 50 value 2111.772312
## iter 60 value 1830.003863
## iter 70 value 1745.573034
## iter 80 value 1726.343405
## iter 90 value 1711.031654
## iter 100 value 1698.981046
## final value 1698.981046
## stopped after 100 iterations
## # weights: 331
## initial value 8011.925037
## iter 10 value 2425.677034
## iter 20 value 2374.835024
## iter 30 value 2362.283702
## iter 40 value 2192.457711
## iter 50 value 2192.345594
## iter 60 value 2170.945545
## iter 70 value 2170.604943
## iter 80 value 2167.434702
## iter 90 value 2163.938948
## iter 100 value 2162.883474
## final value 2162.883474
## stopped after 100 iterations
## # weights: 551
## initial value 2942.109143
## iter 10 value 2374.178053
## iter 20 value 2373.737856
## iter 30 value 2361.645292
## iter 40 value 2189.053824
## iter 50 value 2180.884737
## iter 60 value 2176.638745
## iter 70 value 2168.115358
## iter 80 value 2167.842060
## iter 90 value 2161.836356
## iter 100 value 2161.225501
## final value 2161.225501
## stopped after 100 iterations
## # weights: 771
## initial value 13472.895242
## iter 10 value 2376.817406
## iter 20 value 2308.538686
## iter 30 value 2208.909928
## iter 40 value 2140.301080
## iter 50 value 2050.709301
## iter 60 value 1903.206786
## iter 70 value 1799.470462
## iter 80 value 1765.464863
## iter 90 value 1762.239936
## iter 100 value 1760.628122
## final value 1760.628122
## stopped after 100 iterations
## # weights: 221
## initial value 8191.353439
## iter 10 value 2227.073953
## iter 20 value 2117.343356
## iter 30 value 2090.365015
## iter 40 value 2055.522220
## iter 50 value 2019.286060
## iter 60 value 1900.210167
## iter 70 value 1807.355055
## iter 80 value 1766.385683
## iter 90 value 1761.642848
## iter 100 value 1760.165800
## final value 1760.165800
## stopped after 100 iterations
## # weights: 331
## initial value 10343.691776
## iter 10 value 2361.644320
## iter 20 value 2224.276041
## iter 30 value 2216.948512
## iter 40 value 2176.139622
## iter 50 value 2173.930374
## iter 60 value 2173.471800
## iter 70 value 2169.302616
## iter 80 value 2167.310281
## iter 90 value 2165.999789
## iter 100 value 2165.854397
## final value 2165.854397
## stopped after 100 iterations
## # weights: 551
## initial value 10260.882822
## iter 10 value 2160.800725
## iter 20 value 1755.586999
## iter 30 value 1724.609303
## iter 40 value 1715.382323
## iter 50 value 1712.474379
## iter 60 value 1685.868905
## iter 70 value 1677.087277
## iter 80 value 1675.197462
## iter 90 value 1671.592911
## iter 100 value 1667.695520
## final value 1667.695520
## stopped after 100 iterations
## # weights: 771
## initial value 7930.190466
## iter 10 value 2333.356739
## iter 20 value 2182.373379
## iter 30 value 2171.133683
## iter 40 value 2168.388278
## iter 50 value 2150.304770
## iter 60 value 2134.446934
## iter 70 value 2085.414051
## iter 80 value 2080.652053
## iter 90 value 2066.546534
## iter 100 value 2056.642515
## final value 2056.642515
## stopped after 100 iterations
## # weights: 221
## initial value 9182.616591
## iter 10 value 2378.393845
## iter 20 value 2172.136281
## iter 30 value 2138.617650
## iter 40 value 2099.234439
## iter 50 value 2082.480958
## iter 60 value 2074.195096
## iter 70 value 2062.184417
## iter 80 value 2049.995213
## iter 90 value 2042.512112
## iter 100 value 2041.725829
## final value 2041.725829
## stopped after 100 iterations
## # weights: 331
## initial value 11730.626548
## iter 10 value 2381.387869
## iter 20 value 2346.534521
## iter 30 value 2299.140390
## iter 40 value 2161.321798
## iter 50 value 2068.821920
## iter 60 value 2039.418910
## iter 70 value 1961.519084
## iter 80 value 1931.128656
## iter 90 value 1894.400233
## iter 100 value 1826.461889
## final value 1826.461889
## stopped after 100 iterations
## # weights: 551
## initial value 12558.503941
## iter 10 value 2907.245167
## iter 20 value 2206.268137
## iter 30 value 2184.655170
## iter 40 value 2048.686307
## iter 50 value 1808.871462
## iter 60 value 1757.698683
## iter 70 value 1722.727646
## iter 80 value 1690.013854
## iter 90 value 1669.029730
## iter 100 value 1660.881366
## final value 1660.881366
## stopped after 100 iterations
## # weights: 771
## initial value 4337.705135
## iter 10 value 2406.358044
## iter 20 value 2314.681386
## iter 30 value 2240.475579
## iter 40 value 2190.706965
## iter 50 value 2156.777411
## iter 60 value 2153.787340
## iter 70 value 2151.175818
## iter 80 value 2147.510879
## iter 90 value 2146.825099
## final value 2146.786781
## converged
## # weights: 221
## initial value 12276.631257
## iter 10 value 2392.900102
## iter 20 value 2121.590158
## iter 30 value 1856.046854
## iter 40 value 1787.541948
## iter 50 value 1738.021230
## iter 60 value 1717.060730
## iter 70 value 1701.277993
## iter 80 value 1695.022920
## iter 90 value 1688.591486
## iter 100 value 1688.297192
## final value 1688.297192
## stopped after 100 iterations
## # weights: 331
## initial value 5939.917236
## iter 10 value 2376.493697
## iter 20 value 2243.766809
## iter 30 value 2149.403369
## iter 40 value 2102.066549
## iter 50 value 2092.980023
## iter 60 value 2079.641890
## iter 70 value 2019.992714
## iter 80 value 1996.567180
## iter 90 value 1839.747107
## iter 100 value 1698.568688
## final value 1698.568688
## stopped after 100 iterations
## # weights: 551
## initial value 11958.743601
## iter 10 value 2507.418861
## iter 20 value 2291.993389
## iter 30 value 2059.185754
## iter 40 value 1973.001425
## iter 50 value 1805.466056
## iter 60 value 1728.006545
## iter 70 value 1723.705590
## iter 80 value 1674.785841
## iter 90 value 1661.808090
## iter 100 value 1657.136902
## final value 1657.136902
## stopped after 100 iterations
## # weights: 771
## initial value 4838.008939
## iter 10 value 2279.069710
## iter 20 value 2108.880432
## iter 30 value 2056.115417
## iter 40 value 2053.961660
## iter 50 value 2046.357970
## iter 60 value 2031.464106
## iter 70 value 1967.196716
## iter 80 value 1922.851556
## iter 90 value 1902.022519
## iter 100 value 1826.472241
## final value 1826.472241
## stopped after 100 iterations
## # weights: 221
## initial value 7992.151113
## iter 10 value 2353.237620
## iter 20 value 2348.140621
## iter 30 value 2348.058850
## iter 40 value 2156.803174
## iter 50 value 2153.426031
## iter 60 value 2153.360807
## iter 70 value 2152.998465
## iter 80 value 2150.910018
## iter 90 value 2126.461344
## iter 100 value 2112.012671
## final value 2112.012671
## stopped after 100 iterations
## # weights: 331
## initial value 8869.793580
## iter 10 value 2237.773111
## iter 20 value 2190.033065
## iter 30 value 2189.674336
## iter 40 value 2170.279677
## iter 50 value 2169.368497
## iter 60 value 2168.702205
## iter 70 value 2161.482484
## iter 80 value 2091.221658
## iter 90 value 1989.536404
## iter 100 value 1911.175278
## final value 1911.175278
## stopped after 100 iterations
## # weights: 551
## initial value 5825.468117
## iter 10 value 2258.867594
## iter 20 value 2179.930119
## iter 30 value 2165.219295
## iter 40 value 2160.543659
## iter 50 value 2151.780699
## iter 60 value 2142.032684
## iter 70 value 2124.867017
## iter 80 value 2122.398257
## iter 90 value 2060.497685
## iter 100 value 1990.404053
## final value 1990.404053
## stopped after 100 iterations
## # weights: 771
## initial value 5127.189956
## iter 10 value 2366.712027
## iter 20 value 2215.023526
## iter 30 value 2134.784981
## iter 40 value 2099.275743
## iter 50 value 2079.053045
## iter 60 value 2059.680987
## iter 70 value 2055.748604
## iter 80 value 2051.597445
## iter 90 value 2047.046401
## iter 100 value 2036.947990
## final value 2036.947990
## stopped after 100 iterations
## # weights: 221
## initial value 8647.308450
## iter 10 value 2381.509739
## iter 20 value 2190.187893
## iter 30 value 2162.938974
## iter 40 value 2096.036619
## iter 50 value 2077.219433
## iter 60 value 2072.528938
## iter 70 value 2044.762863
## iter 80 value 2028.994327
## iter 90 value 2010.701103
## iter 100 value 1958.831118
## final value 1958.831118
## stopped after 100 iterations
## # weights: 331
## initial value 7021.401500
## iter 10 value 2380.297391
## iter 20 value 2378.815358
## iter 30 value 2378.798285
## iter 40 value 2377.355176
## iter 50 value 2282.357048
## iter 60 value 2160.217272
## iter 70 value 2159.088910
## iter 80 value 2147.956988
## iter 90 value 2145.271867
## iter 100 value 2141.011054
## final value 2141.011054
## stopped after 100 iterations
## # weights: 551
## initial value 6009.537043
## iter 10 value 2380.232231
## iter 20 value 2154.965651
## iter 30 value 2091.348607
## iter 40 value 2089.938056
## iter 50 value 2089.577205
## iter 60 value 2068.335716
## iter 70 value 2035.177879
## iter 80 value 1885.472038
## iter 90 value 1779.177357
## iter 100 value 1739.264210
## final value 1739.264210
## stopped after 100 iterations
## # weights: 771
## initial value 4420.451110
## iter 10 value 2379.495624
## iter 20 value 2377.587726
## iter 30 value 2377.553924
## iter 40 value 2181.655375
## iter 50 value 2163.447447
## iter 60 value 2163.241251
## iter 70 value 2162.845528
## iter 80 value 2161.510684
## final value 2160.730993
## converged
## # weights: 221
## initial value 5370.549674
## iter 10 value 2374.405073
## iter 20 value 2374.291261
## iter 30 value 2369.348823
## iter 40 value 2368.900626
## iter 50 value 2115.523039
## iter 60 value 2092.214751
## iter 70 value 2091.527568
## final value 2091.509864
## converged
## # weights: 331
## initial value 4044.925745
## iter 10 value 2374.546648
## iter 20 value 2373.893540
## iter 30 value 2373.885645
## iter 40 value 2363.103969
## iter 50 value 2362.780949
## iter 60 value 2362.664889
## iter 70 value 2361.970499
## iter 80 value 2360.684571
## iter 90 value 2342.260617
## iter 100 value 2239.066137
## final value 2239.066137
## stopped after 100 iterations
## # weights: 551
## initial value 19236.013921
## iter 10 value 2405.694491
## iter 20 value 2371.190863
## iter 30 value 2370.893510
## iter 40 value 2369.699125
## iter 50 value 2363.989737
## iter 60 value 2239.107437
## iter 70 value 2176.710443
## iter 80 value 2154.764151
## iter 90 value 2127.929718
## iter 100 value 2106.623475
## final value 2106.623475
## stopped after 100 iterations
## # weights: 771
## initial value 10444.926587
## iter 10 value 2610.619023
## iter 20 value 2406.488112
## iter 30 value 2306.394509
## iter 40 value 2194.077134
## iter 50 value 2133.276296
## iter 60 value 2100.885811
## iter 70 value 2086.672293
## iter 80 value 2071.223737
## iter 90 value 2047.750887
## iter 100 value 1963.747370
## final value 1963.747370
## stopped after 100 iterations
## # weights: 221
## initial value 13119.713477
## iter 10 value 2224.565767
## iter 20 value 1998.240862
## iter 30 value 1866.093889
## iter 40 value 1818.173508
## iter 50 value 1760.605189
## iter 60 value 1738.423692
## iter 70 value 1724.398512
## iter 80 value 1723.087300
## iter 90 value 1721.493916
## iter 100 value 1721.421277
## final value 1721.421277
## stopped after 100 iterations
## # weights: 331
## initial value 7992.706894
## iter 10 value 2360.019469
## iter 20 value 2349.621187
## iter 30 value 2269.221106
## iter 40 value 2137.344640
## iter 50 value 2130.920189
## iter 60 value 2117.647840
## iter 70 value 2087.449011
## iter 80 value 2020.705575
## iter 90 value 1818.412589
## iter 100 value 1771.176372
## final value 1771.176372
## stopped after 100 iterations
## # weights: 551
## initial value 7125.057980
## iter 10 value 2396.013277
## iter 20 value 2380.535515
## iter 30 value 2370.696175
## iter 40 value 2310.372072
## iter 50 value 2206.228975
## iter 60 value 2142.194461
## iter 70 value 2105.376359
## iter 80 value 1997.502373
## iter 90 value 1953.273664
## iter 100 value 1843.063768
## final value 1843.063768
## stopped after 100 iterations
## # weights: 771
## initial value 12166.355805
## iter 10 value 2387.546584
## iter 20 value 2384.644058
## iter 30 value 2211.222644
## iter 40 value 2153.546373
## iter 50 value 2084.101200
## iter 60 value 2065.458439
## iter 70 value 1943.573970
## iter 80 value 1850.546217
## iter 90 value 1833.729406
## iter 100 value 1828.183210
## final value 1828.183210
## stopped after 100 iterations
## # weights: 221
## initial value 9453.213503
## iter 10 value 2379.452210
## iter 20 value 2334.491838
## iter 30 value 2180.480275
## iter 40 value 2173.067242
## iter 50 value 2140.523117
## iter 60 value 2113.306541
## iter 70 value 2045.471726
## iter 80 value 1959.855737
## iter 90 value 1842.466994
## iter 100 value 1773.947810
## final value 1773.947810
## stopped after 100 iterations
## # weights: 331
## initial value 5237.533364
## iter 10 value 2380.032987
## iter 20 value 2378.125842
## iter 30 value 2193.991466
## iter 40 value 2183.969294
## iter 50 value 2161.233899
## iter 60 value 2124.089860
## iter 70 value 2099.525225
## iter 80 value 2081.414112
## iter 90 value 2032.971513
## iter 100 value 1986.856475
## final value 1986.856475
## stopped after 100 iterations
## # weights: 551
## initial value 13684.506351
## iter 10 value 2382.787310
## iter 20 value 2378.273282
## iter 30 value 2362.902621
## iter 40 value 2249.679942
## iter 50 value 2180.465502
## iter 60 value 2145.983337
## iter 70 value 2130.022763
## iter 80 value 2125.014374
## iter 90 value 2118.525811
## iter 100 value 2107.609682
## final value 2107.609682
## stopped after 100 iterations
## # weights: 771
## initial value 4137.908039
## iter 10 value 2376.015477
## iter 20 value 2246.337502
## iter 30 value 2128.222490
## iter 40 value 2111.597124
## iter 50 value 2099.149021
## iter 60 value 2089.268973
## iter 70 value 2075.537254
## iter 80 value 2073.994178
## iter 90 value 2069.086719
## iter 100 value 2053.667398
## final value 2053.667398
## stopped after 100 iterations
## # weights: 771
## initial value 20166.737908
## iter 10 value 3567.074234
## iter 20 value 3561.796271
## iter 30 value 3482.815181
## iter 40 value 3247.982898
## iter 50 value 3235.572524
## iter 60 value 3229.839157
## iter 70 value 3179.758606
## iter 80 value 3122.692279
## iter 90 value 3107.187417
## iter 100 value 3098.274268
## final value 3098.274268
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n4_NN1Fit0
## Neural Network
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11134, 11133
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9514970 0.27512234
## 2 0.5 0.9507186 0.27516556
## 2 0.7 0.9508981 0.24936513
## 3 0.3 0.9485033 0.12560280
## 3 0.5 0.9472454 0.09368955
## 3 0.7 0.9497604 0.21684410
## 5 0.3 0.9507784 0.26568168
## 5 0.5 0.9517963 0.27278170
## 5 0.7 0.9522155 0.27915221
## 7 0.3 0.9495809 0.27591656
## 7 0.5 0.9531736 0.28182707
## 7 0.7 0.9488023 0.27553687
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.5.
Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9522098 0.2598833 Fold2
## 2 0.9541944 0.3027466 Fold1
## 3 0.9531166 0.2828513 Fold3
ad_tda_pc_5.50.5_n4_nn1_fit_re<-Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.02 0.22 0.00 0.00 0.00 0.00 0.01 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 -0.01 0.00 0.03
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.17 -0.04 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.07 0.00 0.00 0.00 -0.05 0.00 0.01
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.03 0.00 0.01 0.00 0.00 0.00 0.01 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.01 0.00 0.02 0.00 0.04 -0.04
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.01 0.00 0.01 0.00 0.03
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## -0.01 0.00 0.15 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.02 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## -0.97 -0.08 -0.31 0.22 0.17 0.00 -0.75 0.02
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.38 0.11 -0.03 0.00 -0.06 -0.36 -0.52 -0.14
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.09 0.00 -0.19 0.04 -0.55 0.61 0.00 -0.58
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.45 -0.19 0.00 0.42 -0.50 0.10 0.00 0.15
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## -0.02 -0.57 -0.72 0.10 -0.31 -0.68 0.00 -0.26
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.37 -0.03 -0.39 -0.15 0.37 0.04 0.21 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## -0.68 0.83 -0.28 -0.80 0.15 0.01 0.08 -0.96
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.56 0.09 -0.04 -0.25 0.02 -0.80 0.22 -1.19
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 -0.09 0.12 -0.19 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 -0.01 0.15 0.02 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## -0.14 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.03 0.25 0.00 0.15 0.00 -0.31 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 -0.05 0.00 -0.53 0.20 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 -0.67 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.10 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## -0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## 0.00 -0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 0.26 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## -0.75 0.00 -1.65 0.00 3.38 -0.01 0.00 -0.68
vip(Adult_TDA_PC_5.50.5_n4_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7407 2028
## >50K 9 324
##
## Accuracy : 0.7915
## 95% CI : (0.7833, 0.7995)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.984e-14
##
## Kappa : 0.1932
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9988
## Specificity : 0.1378
## Pos Pred Value : 0.7851
## Neg Pred Value : 0.9730
## Prevalence : 0.7592
## Detection Rate : 0.7583
## Detection Prevalence : 0.9659
## Balanced Accuracy : 0.5683
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7407 2028
## >50K 9 324
##
## Accuracy : 0.7915
## 95% CI : (0.7833, 0.7995)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.984e-14
##
## Kappa : 0.1932
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9988
## Specificity : 0.1378
## Pos Pred Value : 0.7851
## Neg Pred Value : 0.9730
## Prevalence : 0.7592
## Detection Rate : 0.7583
## Detection Prevalence : 0.9659
## Balanced Accuracy : 0.5683
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.914619e-01 1.931511e-01 7.832678e-01 7.994819e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.983630e-14 0.000000e+00
ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9987864 0.1377551 0.7850556
## Neg Pred Value Precision Recall
## 0.9729730 0.7850556 0.9987864
## F1 Prevalence Detection Rate
## 0.8791170 0.7592138 0.7582924
## Detection Prevalence Balanced Accuracy
## 0.9659091 0.5682708
ad_tda_pc_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n4_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n4_3_fold
## Accuracy
## 1 -0.1235707
## 2 -0.1079393
## 3 -0.1461937
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9912667
##
## $winRope
## [1] 0.008733333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.9939905
##
## $rope
## [1] 0.001617117
##
## $right
## [1] 0.004392391
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
#bf_tda_pca_5.50.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold)
## t = -11.338, df = 2, p-value = 0.00769
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.17367967 -0.07812279
## sample estimates:
## mean of x
## -0.1259012
### Test set diff
diff_tda_pca_5.50.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n4_test
## Accuracy
## 0.01238739
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n4_test_odds.left<-bst_tda_pca_5.50.5_nn1.n4_test$probLeft/bst_tda_pca_5.50.5_nn1.n4_test$probRight
bst_tda_pca_5.50.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4589333
##
## $winRight
## [1] 0.5410667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_PC_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 8114.725127
## iter 10 value 955.349176
## iter 20 value 146.535214
## iter 30 value 143.323738
## iter 40 value 127.664320
## iter 50 value 122.090303
## iter 60 value 122.051380
## final value 122.051077
## converged
## # weights: 331
## initial value 6782.900302
## iter 10 value 803.208127
## iter 20 value 333.939333
## iter 30 value 145.915091
## iter 40 value 143.901633
## iter 50 value 142.368765
## iter 60 value 142.232359
## iter 70 value 140.371788
## iter 80 value 132.766661
## iter 90 value 125.434288
## final value 125.432492
## converged
## # weights: 551
## initial value 8386.479716
## iter 10 value 743.156498
## iter 20 value 144.207283
## iter 30 value 140.479720
## iter 40 value 139.431464
## iter 50 value 139.192910
## iter 60 value 139.190149
## final value 139.190134
## converged
## # weights: 771
## initial value 4819.555805
## iter 10 value 200.603002
## iter 20 value 146.212628
## iter 30 value 145.385861
## iter 40 value 144.678457
## iter 50 value 143.970247
## iter 60 value 143.853175
## iter 70 value 142.967678
## iter 80 value 129.755083
## iter 90 value 129.279069
## iter 100 value 127.733859
## final value 127.733859
## stopped after 100 iterations
## # weights: 221
## initial value 10295.661329
## iter 10 value 213.438469
## iter 20 value 148.268561
## iter 30 value 147.367112
## iter 40 value 127.034871
## iter 50 value 125.579759
## iter 60 value 123.756647
## iter 70 value 123.616960
## iter 80 value 123.613790
## final value 123.613779
## converged
## # weights: 331
## initial value 15602.447840
## final value 8979.567482
## converged
## # weights: 551
## initial value 11106.726781
## final value 7495.000763
## converged
## # weights: 771
## initial value 2393.182194
## iter 10 value 142.491150
## iter 20 value 140.265643
## iter 30 value 140.067256
## iter 40 value 133.844875
## iter 50 value 124.776606
## iter 60 value 120.905279
## iter 70 value 120.832997
## iter 80 value 120.826763
## iter 90 value 120.823279
## iter 100 value 120.819807
## final value 120.819807
## stopped after 100 iterations
## # weights: 221
## initial value 10468.441696
## iter 10 value 1247.377788
## iter 20 value 168.174707
## iter 30 value 146.895843
## iter 40 value 146.098308
## iter 50 value 146.093954
## iter 60 value 146.093590
## iter 60 value 146.093589
## iter 60 value 146.093589
## final value 146.093589
## converged
## # weights: 331
## initial value 5508.624612
## iter 10 value 230.074516
## iter 20 value 171.841236
## iter 30 value 136.128824
## iter 40 value 128.867653
## iter 50 value 128.791065
## iter 60 value 128.457196
## iter 70 value 127.576533
## iter 80 value 127.394796
## iter 90 value 127.386014
## final value 127.385918
## converged
## # weights: 551
## initial value 5316.624827
## iter 10 value 156.247147
## iter 20 value 141.740356
## iter 30 value 141.728673
## final value 141.728637
## converged
## # weights: 771
## initial value 1762.810200
## iter 10 value 209.300775
## iter 20 value 191.437920
## iter 30 value 160.002776
## iter 40 value 141.128098
## iter 50 value 136.372898
## iter 60 value 128.243583
## iter 70 value 127.395994
## iter 80 value 126.962541
## iter 90 value 126.691776
## iter 100 value 126.665930
## final value 126.665930
## stopped after 100 iterations
## # weights: 221
## initial value 3594.508096
## iter 10 value 155.875304
## iter 20 value 147.750919
## iter 30 value 147.233474
## final value 147.233451
## converged
## # weights: 331
## initial value 11346.401158
## iter 10 value 1261.155634
## iter 20 value 234.482796
## iter 30 value 157.728370
## iter 40 value 149.114510
## iter 50 value 149.093072
## iter 60 value 147.694964
## iter 70 value 147.238760
## iter 80 value 147.207997
## iter 90 value 137.221416
## iter 100 value 134.855688
## final value 134.855688
## stopped after 100 iterations
## # weights: 551
## initial value 5955.395537
## iter 10 value 1284.530259
## iter 20 value 1108.383585
## iter 30 value 167.299559
## iter 40 value 141.045737
## iter 50 value 132.956208
## iter 60 value 125.785625
## iter 70 value 118.741038
## iter 80 value 112.880101
## iter 90 value 110.337522
## iter 100 value 110.059933
## final value 110.059933
## stopped after 100 iterations
## # weights: 771
## initial value 4078.741639
## iter 10 value 184.352920
## iter 20 value 164.487132
## iter 30 value 141.024537
## iter 40 value 135.898757
## iter 50 value 135.333847
## iter 60 value 135.065944
## iter 70 value 135.012249
## iter 80 value 133.208397
## iter 90 value 132.811225
## iter 100 value 132.542635
## final value 132.542635
## stopped after 100 iterations
## # weights: 221
## initial value 7277.681072
## final value 5289.067554
## converged
## # weights: 331
## initial value 2375.764083
## iter 10 value 152.299399
## iter 20 value 139.283081
## iter 30 value 138.535381
## iter 40 value 138.452331
## iter 50 value 137.794557
## iter 60 value 137.322953
## iter 70 value 137.316473
## final value 137.316392
## converged
## # weights: 551
## initial value 8107.701183
## iter 10 value 2481.926506
## iter 20 value 2228.135442
## iter 30 value 179.059568
## iter 40 value 149.800349
## iter 50 value 148.249261
## iter 60 value 148.167834
## iter 70 value 148.164986
## iter 80 value 147.397685
## iter 90 value 146.852759
## iter 100 value 138.775744
## final value 138.775744
## stopped after 100 iterations
## # weights: 771
## initial value 3248.406574
## iter 10 value 164.645719
## iter 20 value 154.746074
## iter 30 value 149.569158
## iter 40 value 124.585744
## iter 50 value 122.993783
## iter 60 value 122.249542
## iter 70 value 120.929034
## iter 80 value 119.778015
## iter 90 value 119.173984
## iter 100 value 118.981551
## final value 118.981551
## stopped after 100 iterations
## # weights: 221
## initial value 4831.366019
## iter 10 value 159.649543
## iter 20 value 152.165837
## iter 30 value 152.157330
## iter 40 value 152.155297
## iter 40 value 152.155296
## iter 40 value 152.155296
## final value 152.155296
## converged
## # weights: 331
## initial value 7396.843625
## iter 10 value 2463.167185
## iter 20 value 729.514344
## iter 30 value 152.528177
## iter 40 value 151.392722
## iter 50 value 150.091777
## iter 60 value 150.048227
## iter 70 value 150.039919
## iter 80 value 150.033212
## iter 90 value 150.017878
## iter 100 value 150.016910
## final value 150.016910
## stopped after 100 iterations
## # weights: 551
## initial value 16915.034203
## iter 10 value 752.216866
## iter 20 value 148.773141
## iter 30 value 148.587417
## iter 40 value 143.924784
## iter 50 value 140.169230
## iter 60 value 140.115269
## iter 70 value 139.716756
## iter 80 value 137.682901
## iter 90 value 137.158001
## iter 100 value 135.820296
## final value 135.820296
## stopped after 100 iterations
## # weights: 771
## initial value 7981.096380
## iter 10 value 393.883238
## iter 20 value 385.018614
## iter 30 value 151.684513
## iter 40 value 149.105774
## iter 50 value 148.495400
## iter 60 value 144.066648
## iter 70 value 141.319638
## iter 80 value 140.220873
## iter 90 value 138.690152
## iter 100 value 137.689693
## final value 137.689693
## stopped after 100 iterations
## # weights: 221
## initial value 11031.729530
## iter 10 value 1196.093223
## iter 20 value 164.338322
## iter 30 value 142.999081
## iter 40 value 132.856462
## iter 50 value 127.788561
## iter 60 value 127.627346
## final value 127.625925
## converged
## # weights: 331
## initial value 16118.653862
## iter 10 value 1902.207115
## iter 20 value 1726.198084
## iter 30 value 246.984968
## iter 40 value 150.851762
## iter 50 value 140.998849
## iter 60 value 139.281696
## final value 138.674114
## converged
## # weights: 551
## initial value 9114.665553
## iter 10 value 2264.368277
## iter 20 value 179.679897
## iter 30 value 144.885412
## iter 40 value 139.021383
## iter 50 value 130.078052
## iter 60 value 129.906488
## iter 70 value 129.901615
## iter 80 value 129.113833
## iter 90 value 128.537884
## iter 100 value 128.527035
## final value 128.527035
## stopped after 100 iterations
## # weights: 771
## initial value 14718.897146
## iter 10 value 3575.512617
## iter 20 value 175.844489
## iter 30 value 140.132871
## iter 40 value 139.780904
## iter 50 value 134.525000
## iter 60 value 128.386603
## iter 70 value 123.424979
## iter 80 value 122.106268
## iter 90 value 121.464634
## iter 100 value 121.442818
## final value 121.442818
## stopped after 100 iterations
## # weights: 221
## initial value 7210.044833
## final value 5085.771188
## converged
## # weights: 331
## initial value 7840.623688
## final value 5267.270212
## converged
## # weights: 551
## initial value 9766.210830
## final value 5801.465928
## converged
## # weights: 771
## initial value 3726.600694
## iter 10 value 156.985537
## iter 20 value 145.841976
## iter 30 value 137.877349
## iter 40 value 134.690931
## iter 50 value 132.271504
## iter 60 value 131.970327
## iter 70 value 131.737639
## iter 80 value 131.597859
## iter 90 value 131.406544
## iter 100 value 131.226594
## final value 131.226594
## stopped after 100 iterations
## # weights: 221
## initial value 8557.747268
## iter 10 value 1115.936081
## iter 20 value 171.405250
## iter 30 value 150.476714
## iter 40 value 150.366741
## iter 50 value 146.091929
## final value 146.091313
## converged
## # weights: 331
## initial value 7840.229346
## iter 10 value 3125.471990
## iter 20 value 165.025097
## iter 30 value 149.651050
## iter 40 value 140.373153
## iter 50 value 133.566021
## iter 60 value 131.890493
## iter 70 value 129.795329
## iter 80 value 128.192149
## iter 90 value 127.986255
## iter 100 value 127.977585
## final value 127.977585
## stopped after 100 iterations
## # weights: 551
## initial value 2820.838170
## iter 10 value 183.534617
## iter 20 value 158.604882
## iter 30 value 151.101081
## iter 40 value 134.003396
## iter 50 value 133.852541
## iter 60 value 133.838889
## iter 70 value 133.838061
## final value 133.838052
## converged
## # weights: 771
## initial value 3323.601046
## iter 10 value 143.718280
## iter 20 value 141.750196
## iter 30 value 141.727870
## iter 40 value 141.713513
## iter 50 value 141.673753
## iter 60 value 141.514376
## iter 70 value 141.210279
## iter 80 value 137.529991
## iter 90 value 131.693695
## iter 100 value 129.787842
## final value 129.787842
## stopped after 100 iterations
## # weights: 221
## initial value 16601.226835
## iter 10 value 235.554930
## iter 20 value 222.178009
## iter 30 value 218.092472
## final value 217.850120
## converged
Adult_TDA_PC_5.50.5_n5_NN1Fit0
## Neural Network
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9603, 9603, 9602
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9979867 0
## 2 0.5 0.9979867 0
## 2 0.7 0.9979867 0
## 3 0.3 0.9979867 0
## 3 0.5 0.9979867 0
## 3 0.7 0.9979867 0
## 5 0.3 0.9979867 0
## 5 0.5 0.9979867 0
## 5 0.7 0.9979867 0
## 7 0.3 0.9979867 0
## 7 0.5 0.9979867 0
## 7 0.7 0.9979867 0
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9979175 0 Fold3
## 2 0.9981254 0 Fold2
## 3 0.9979171 0 Fold1
ad_tda_pc_5.50.5_n5_nn1_fit_re<-Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o
## -2.04 -2.04 -2.04
vip(Adult_TDA_PC_5.50.5_n5_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.50.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n5_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n5_3_fold
## Accuracy
## 1 -0.1692784
## 2 -0.1518703
## 3 -0.1909942
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9905333
##
## $winRope
## [1] 0.009466667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.9967268
##
## $rope
## [1] 0.0006790979
##
## $right
## [1] 0.002594112
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
#bf_tda_pca_5.50.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold)
## t = -15.085, df = 2, p-value = 0.004366
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2194070 -0.1220216
## sample estimates:
## mean of x
## -0.1707143
### Test set diff
diff_tda_pca_5.50.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n5_test
## Accuracy
## 0.04463554
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nn1.n5_test_odds.left<-bst_tda_pca_5.50.5_nn1.n5_test$probLeft/bst_tda_pca_5.50.5_nn1.n5_test$probRight
bst_tda_pca_5.50.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1599
##
## $winRight
## [1] 0.8401
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_KDE_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 6149.257507
## iter 10 value 5108.018332
## iter 20 value 4701.587790
## iter 30 value 4666.466015
## iter 40 value 4611.630260
## iter 50 value 4500.538714
## iter 60 value 4468.956661
## iter 70 value 4433.934881
## iter 80 value 4395.204283
## iter 90 value 4381.527680
## iter 100 value 4367.839204
## final value 4367.839204
## stopped after 100 iterations
## # weights: 331
## initial value 5679.128402
## iter 10 value 5069.737704
## iter 20 value 4676.178389
## iter 30 value 4652.939849
## iter 40 value 4646.944859
## iter 50 value 4585.979057
## iter 60 value 4543.743732
## iter 70 value 4525.201564
## iter 80 value 4510.096137
## iter 90 value 4504.194642
## iter 100 value 4162.779379
## final value 4162.779379
## stopped after 100 iterations
## # weights: 551
## initial value 10235.010522
## iter 10 value 5045.386653
## iter 20 value 4986.907411
## iter 30 value 4953.260081
## iter 40 value 4907.668238
## iter 50 value 4905.538685
## iter 60 value 4584.030818
## iter 70 value 4541.912777
## iter 80 value 4512.474436
## iter 90 value 4501.659006
## iter 100 value 4494.602528
## final value 4494.602528
## stopped after 100 iterations
## # weights: 771
## initial value 11318.371368
## iter 10 value 4990.763017
## iter 20 value 4655.244236
## iter 30 value 4516.639038
## iter 40 value 4502.771933
## iter 50 value 4493.240297
## iter 60 value 4491.945598
## iter 70 value 4423.030165
## iter 80 value 4307.274819
## iter 90 value 4144.973196
## iter 100 value 3840.732895
## final value 3840.732895
## stopped after 100 iterations
## # weights: 221
## initial value 5520.959303
## iter 10 value 5108.114505
## iter 20 value 5108.058151
## iter 30 value 5089.052374
## iter 40 value 4771.188428
## iter 50 value 4615.340975
## iter 60 value 4543.117500
## iter 70 value 4486.918050
## iter 80 value 4388.467045
## iter 90 value 3694.932972
## iter 100 value 3200.524854
## final value 3200.524854
## stopped after 100 iterations
## # weights: 331
## initial value 6710.880706
## iter 10 value 4896.023460
## iter 20 value 4647.248076
## iter 30 value 4602.133160
## iter 40 value 4548.314515
## iter 50 value 4523.473325
## iter 60 value 4515.536828
## iter 70 value 4512.185141
## iter 80 value 4511.321099
## iter 90 value 4430.075888
## iter 100 value 4363.506317
## final value 4363.506317
## stopped after 100 iterations
## # weights: 551
## initial value 6562.785894
## iter 10 value 4861.429721
## iter 20 value 4718.997443
## iter 30 value 4608.132469
## iter 40 value 4537.775361
## iter 50 value 4532.561750
## iter 60 value 4525.383242
## iter 70 value 4522.510067
## iter 80 value 4521.112476
## iter 90 value 4499.717359
## iter 100 value 4430.437803
## final value 4430.437803
## stopped after 100 iterations
## # weights: 771
## initial value 5483.256670
## iter 10 value 4916.368015
## iter 20 value 4684.151027
## iter 30 value 4659.355482
## iter 40 value 4649.167432
## iter 50 value 4597.285065
## iter 60 value 4512.612367
## iter 70 value 4508.300699
## iter 80 value 4495.659100
## iter 90 value 4351.693676
## iter 100 value 3920.617188
## final value 3920.617188
## stopped after 100 iterations
## # weights: 221
## initial value 7712.866670
## iter 10 value 5057.826841
## iter 20 value 4819.834152
## iter 30 value 4645.605327
## iter 40 value 4630.552478
## iter 50 value 4626.996150
## iter 50 value 4626.996122
## iter 60 value 4626.354827
## iter 60 value 4626.354814
## iter 60 value 4626.354783
## final value 4626.354783
## converged
## # weights: 331
## initial value 7011.132706
## iter 10 value 4768.359414
## iter 20 value 4645.994592
## iter 30 value 4638.213370
## iter 40 value 4635.561297
## iter 50 value 4631.813245
## iter 60 value 4586.973308
## iter 70 value 4529.990823
## iter 80 value 4187.960908
## iter 90 value 3799.771722
## iter 100 value 3564.169943
## final value 3564.169943
## stopped after 100 iterations
## # weights: 551
## initial value 7591.221582
## iter 10 value 4990.106659
## iter 20 value 4574.258368
## iter 30 value 4527.467275
## iter 40 value 4387.813075
## iter 50 value 4258.403410
## iter 60 value 3991.814086
## iter 70 value 3706.095815
## iter 80 value 3586.852956
## iter 90 value 3034.743639
## iter 100 value 2979.883890
## final value 2979.883890
## stopped after 100 iterations
## # weights: 771
## initial value 5610.548217
## iter 10 value 5106.354204
## iter 20 value 4726.773647
## iter 30 value 4646.696782
## iter 40 value 4629.275668
## iter 50 value 4627.532491
## iter 60 value 4598.061086
## iter 70 value 4517.098927
## iter 80 value 4505.942250
## iter 90 value 4499.452091
## iter 100 value 4429.505364
## final value 4429.505364
## stopped after 100 iterations
## # weights: 221
## initial value 5297.241595
## iter 10 value 5106.248258
## iter 20 value 4955.685843
## iter 30 value 4675.760200
## iter 40 value 4589.422605
## iter 50 value 4513.516405
## iter 60 value 4487.658854
## iter 70 value 4438.309984
## iter 80 value 4394.948858
## iter 90 value 4382.588040
## iter 100 value 4371.694577
## final value 4371.694577
## stopped after 100 iterations
## # weights: 331
## initial value 5183.466604
## iter 10 value 4716.022050
## iter 20 value 4678.910589
## iter 30 value 4677.504934
## iter 40 value 4676.809908
## iter 50 value 4668.693443
## iter 60 value 4652.850609
## iter 70 value 4611.124336
## iter 80 value 4587.011241
## iter 90 value 4567.034426
## iter 100 value 4506.136705
## final value 4506.136705
## stopped after 100 iterations
## # weights: 551
## initial value 5939.145444
## iter 10 value 4772.120073
## iter 20 value 4625.157925
## iter 30 value 4616.410300
## iter 40 value 4596.744362
## iter 50 value 4568.508404
## iter 60 value 4549.664495
## iter 70 value 4543.627994
## iter 80 value 4516.806760
## iter 90 value 4465.065662
## iter 100 value 4444.147983
## final value 4444.147983
## stopped after 100 iterations
## # weights: 771
## initial value 6414.094798
## iter 10 value 4691.277109
## iter 20 value 4650.941967
## iter 30 value 4628.268867
## iter 40 value 4576.166580
## iter 50 value 4486.536580
## iter 60 value 4464.902834
## iter 70 value 4454.541743
## iter 80 value 4449.511660
## iter 90 value 4416.328082
## iter 100 value 4399.814355
## final value 4399.814355
## stopped after 100 iterations
## # weights: 221
## initial value 7356.658578
## iter 10 value 4764.319945
## iter 20 value 4587.245909
## iter 30 value 4393.915676
## iter 40 value 4064.333946
## iter 50 value 3882.195932
## iter 60 value 3511.096413
## iter 70 value 3239.811744
## iter 80 value 3033.924748
## iter 90 value 2996.109066
## iter 100 value 2964.376398
## final value 2964.376398
## stopped after 100 iterations
## # weights: 331
## initial value 9729.059132
## iter 10 value 4799.005109
## iter 20 value 4649.331267
## iter 30 value 4583.841428
## iter 40 value 4543.288877
## iter 50 value 4503.870606
## iter 60 value 4493.471910
## final value 4492.872494
## converged
## # weights: 551
## initial value 4978.033018
## iter 10 value 4854.813149
## iter 20 value 4615.115602
## iter 30 value 4555.157353
## iter 40 value 4517.198474
## iter 50 value 4509.925321
## iter 60 value 4506.732058
## iter 70 value 4482.024381
## iter 80 value 4433.567472
## iter 90 value 4361.271870
## iter 100 value 4037.976597
## final value 4037.976597
## stopped after 100 iterations
## # weights: 771
## initial value 5419.739649
## iter 10 value 4960.641659
## iter 20 value 4616.064063
## iter 30 value 4562.225406
## iter 40 value 4536.947597
## iter 50 value 4406.089299
## iter 60 value 4311.050253
## iter 70 value 4260.917859
## iter 80 value 4070.925039
## iter 90 value 3998.172367
## iter 100 value 3515.750289
## final value 3515.750289
## stopped after 100 iterations
## # weights: 221
## initial value 5458.777873
## iter 10 value 5103.378366
## iter 20 value 5099.680081
## iter 30 value 4711.987731
## iter 40 value 4690.200488
## iter 50 value 4612.666320
## iter 60 value 4551.924312
## iter 70 value 4479.295483
## iter 80 value 4463.728063
## iter 90 value 4442.810002
## iter 100 value 4378.049218
## final value 4378.049218
## stopped after 100 iterations
## # weights: 331
## initial value 7462.260623
## iter 10 value 5113.281040
## iter 20 value 5112.419030
## iter 30 value 4965.240163
## iter 40 value 4604.816908
## iter 50 value 4020.046490
## iter 60 value 3590.729382
## iter 70 value 3303.711063
## iter 80 value 3090.466739
## iter 90 value 2998.550633
## iter 100 value 2899.911776
## final value 2899.911776
## stopped after 100 iterations
## # weights: 551
## initial value 8637.078222
## iter 10 value 4945.286979
## iter 20 value 4564.542715
## iter 30 value 4550.576082
## iter 40 value 4534.179443
## iter 50 value 4520.269648
## iter 60 value 4511.053179
## iter 70 value 4510.498167
## iter 80 value 4507.017650
## iter 90 value 4500.673922
## iter 100 value 4440.085325
## final value 4440.085325
## stopped after 100 iterations
## # weights: 771
## initial value 8162.037620
## iter 10 value 5113.889669
## iter 20 value 4731.761823
## iter 30 value 4660.539972
## iter 40 value 4634.191853
## iter 50 value 4571.634155
## iter 60 value 4527.078527
## iter 70 value 4479.745544
## iter 80 value 4474.089820
## iter 90 value 4467.439895
## iter 100 value 4459.472910
## final value 4459.472910
## stopped after 100 iterations
## # weights: 221
## initial value 4962.370422
## iter 10 value 4598.031315
## iter 20 value 4564.615450
## iter 30 value 4527.833510
## iter 40 value 4484.328666
## iter 50 value 4451.501728
## iter 60 value 4382.199237
## iter 70 value 4308.511211
## iter 80 value 4182.339538
## iter 90 value 3672.833858
## iter 100 value 3216.128558
## final value 3216.128558
## stopped after 100 iterations
## # weights: 331
## initial value 7044.793797
## iter 10 value 4954.378310
## iter 20 value 4792.413478
## iter 30 value 4541.286078
## iter 40 value 4504.711834
## iter 50 value 4489.906554
## iter 60 value 4428.877486
## iter 70 value 4336.139257
## iter 80 value 4173.775202
## iter 90 value 3871.665388
## iter 100 value 3538.236733
## final value 3538.236733
## stopped after 100 iterations
## # weights: 551
## initial value 6602.027091
## iter 10 value 5106.506349
## iter 20 value 5106.482076
## iter 30 value 5083.413279
## iter 40 value 4760.366516
## iter 50 value 4638.425011
## iter 60 value 4623.685584
## iter 70 value 4607.771689
## iter 80 value 4589.913724
## iter 90 value 4417.725996
## iter 100 value 4382.675172
## final value 4382.675172
## stopped after 100 iterations
## # weights: 771
## initial value 5187.250075
## iter 10 value 4819.379437
## iter 20 value 4575.529253
## iter 30 value 4433.642407
## iter 40 value 4414.184861
## iter 50 value 4396.438547
## iter 60 value 4384.625017
## iter 70 value 4382.013948
## iter 80 value 4380.106629
## iter 90 value 4369.364028
## iter 100 value 4359.055680
## final value 4359.055680
## stopped after 100 iterations
## # weights: 221
## initial value 5611.099438
## iter 10 value 4704.032286
## iter 20 value 4610.244721
## iter 30 value 4538.297049
## iter 40 value 4425.365191
## iter 50 value 4394.450964
## iter 60 value 4377.554494
## iter 70 value 4213.687303
## iter 80 value 3995.268979
## iter 90 value 3399.400671
## iter 100 value 3051.919931
## final value 3051.919931
## stopped after 100 iterations
## # weights: 331
## initial value 5534.033510
## iter 10 value 5107.713154
## iter 20 value 5106.659772
## iter 30 value 5106.647626
## iter 40 value 4608.839973
## iter 50 value 4477.289871
## iter 60 value 4457.836667
## iter 70 value 4405.385054
## iter 80 value 4399.868443
## iter 90 value 3777.697981
## iter 100 value 3154.410301
## final value 3154.410301
## stopped after 100 iterations
## # weights: 551
## initial value 5663.189802
## iter 10 value 4982.783056
## iter 20 value 4947.235604
## iter 30 value 4610.671218
## iter 40 value 4521.837737
## iter 50 value 4506.276454
## iter 60 value 4500.294131
## iter 70 value 4482.275642
## iter 80 value 4478.956429
## iter 90 value 4471.335274
## iter 100 value 4469.224863
## final value 4469.224863
## stopped after 100 iterations
## # weights: 771
## initial value 5214.844903
## iter 10 value 4845.890369
## iter 20 value 4580.797245
## iter 30 value 4531.100914
## iter 40 value 4492.796003
## iter 50 value 4443.191170
## iter 60 value 4428.200236
## iter 70 value 4400.691902
## iter 80 value 4386.248316
## iter 90 value 4385.691119
## iter 100 value 3637.756318
## final value 3637.756318
## stopped after 100 iterations
## # weights: 221
## initial value 6354.505278
## iter 10 value 4954.238186
## iter 20 value 4642.413217
## iter 30 value 4623.786662
## iter 40 value 4574.684848
## iter 50 value 4464.538142
## iter 60 value 4451.843182
## iter 70 value 4089.929540
## iter 80 value 3868.273933
## iter 90 value 3455.307673
## iter 100 value 3283.861911
## final value 3283.861911
## stopped after 100 iterations
## # weights: 331
## initial value 5178.842939
## iter 10 value 4982.434083
## iter 20 value 4787.135464
## iter 30 value 4572.976882
## iter 40 value 4413.203965
## iter 50 value 4335.943926
## iter 60 value 4182.315784
## iter 70 value 3235.333952
## iter 80 value 3110.375400
## iter 90 value 3086.575699
## iter 100 value 2871.844382
## final value 2871.844382
## stopped after 100 iterations
## # weights: 551
## initial value 6566.047250
## iter 10 value 5101.035299
## iter 20 value 4663.743917
## iter 30 value 4624.192993
## iter 40 value 4518.139519
## iter 50 value 4449.580502
## iter 60 value 4406.523765
## iter 70 value 4358.334112
## iter 80 value 4328.920748
## iter 90 value 4124.631703
## iter 100 value 4053.940115
## final value 4053.940115
## stopped after 100 iterations
## # weights: 771
## initial value 11415.967320
## iter 10 value 4711.374071
## iter 20 value 4552.640612
## iter 30 value 4508.286388
## iter 40 value 4451.782833
## iter 50 value 4407.997159
## iter 60 value 4384.536901
## iter 70 value 4374.365819
## iter 80 value 4373.127890
## iter 90 value 4365.791917
## iter 100 value 4358.501380
## final value 4358.501380
## stopped after 100 iterations
## # weights: 331
## initial value 8157.908702
## iter 10 value 7660.646412
## iter 20 value 7600.615456
## iter 30 value 6969.887234
## iter 40 value 6964.516545
## iter 50 value 6883.570813
## iter 60 value 6873.163230
## iter 70 value 6861.552904
## iter 80 value 6791.667125
## iter 90 value 6448.674621
## iter 100 value 6014.759468
## final value 6014.759468
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n1_NN1Fit0
## Neural Network
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8925, 8925, 8924
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8133244 0.4314430
## 2 0.5 0.8448499 0.5951524
## 2 0.7 0.8092908 0.4022641
## 3 0.3 0.8101874 0.4301478
## 3 0.5 0.8069748 0.4054663
## 3 0.7 0.8476133 0.5887745
## 5 0.3 0.7923358 0.3360933
## 5 0.5 0.7888999 0.2876728
## 5 0.7 0.8154193 0.4151006
## 7 0.3 0.7923356 0.3219716
## 7 0.5 0.8154940 0.4662657
## 7 0.7 0.7992084 0.3512407
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.7.
Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8478602 0.5860868 Fold3
## 2 0.8655312 0.6414787 Fold2
## 3 0.8294487 0.5387579 Fold1
ad_tda_kde_5.50.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 1.00 2.03 -0.41 -0.41 -1.58 0.00 3.62 0.06
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.46 -0.74 0.00 0.00 0.03 1.24 0.03 0.08
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.46 -0.03 -0.58 0.93 -0.14 0.81 -0.94 -0.25
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -1.44 0.00 0.96 -0.16 3.09 -0.82 0.00 2.12
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.04 -1.87 1.51 0.03 -0.41 1.29 0.01 -0.09
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.11 -0.44 0.05 0.99 -0.11 -0.01 -1.15 0.28
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.90 -0.27 -0.16 1.61 -1.46 0.75 -0.31 -0.01
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.42 -0.05 -0.19 0.47 0.05 0.73 -1.04 2.04
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.08 -0.19 0.52 1.02 0.00 -0.12 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.09 0.00 0.00 -0.23 -0.05 0.00 -0.18 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.01 0.00 0.00 0.00 0.00 0.00 -0.16 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.06 0.00 0.63 0.01 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.04 0.00 0.04 0.00 0.43 -0.17
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 -0.42 0.02 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -4.32 0.09 -1.02 -0.10 -1.47 -0.02 0.31 -1.30
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.39 -0.37 0.04 0.00 -1.32 -2.77 -0.46 -0.15
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## -0.04 -1.26 -0.38 -0.10 0.88 -0.38 1.89 -1.08
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.07 0.25 1.69 -1.01 0.21 -0.95 -0.15 1.63
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## -0.59 -7.30 1.98 1.07 -1.04 -0.25 -0.30 -0.08
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 1.21 -0.45 -2.47 -1.18 -1.22 -0.53 0.21 0.37
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.04 -0.34 1.71 -2.51 1.36 -0.42 -6.15 -0.40
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 3.81 -0.06 -0.05 -2.54 -0.55 -1.12 -3.58 -0.73
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.18 -1.09 -0.12 -0.14 -0.11 0.28
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.01 0.19 0.04 0.20 0.03 0.02 -0.05 -0.25
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.03 -0.32 -0.06 -0.01 0.05 -0.09 0.60 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.01 0.77 -0.20 -0.08 -0.17 -0.55 0.09 0.05
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## -0.26 -0.75 -0.03 0.04 -0.31 -0.11 0.57 0.58
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## -0.01 0.08 -3.16 -0.08 -0.02
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## -2.22 -1.85 2.90 0.10
vip(Adult_TDA_KDE_5.50.5_n1_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7211 1660
## >50K 205 692
##
## Accuracy : 0.8091
## 95% CI : (0.8011, 0.8168)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.338
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9724
## Specificity : 0.2942
## Pos Pred Value : 0.8129
## Neg Pred Value : 0.7715
## Prevalence : 0.7592
## Detection Rate : 0.7382
## Detection Prevalence : 0.9082
## Balanced Accuracy : 0.6333
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7211 1660
## >50K 205 692
##
## Accuracy : 0.8091
## 95% CI : (0.8011, 0.8168)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.338
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9724
## Specificity : 0.2942
## Pos Pred Value : 0.8129
## Neg Pred Value : 0.7715
## Prevalence : 0.7592
## Detection Rate : 0.7382
## Detection Prevalence : 0.9082
## Balanced Accuracy : 0.6333
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.090704e-01 3.379551e-01 8.011333e-01 8.168229e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.647199e-32 1.666515e-248
ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9723571 0.2942177 0.8128734
## Neg Pred Value Precision Recall
## 0.7714604 0.8128734 0.9723571
## F1 Prevalence Detection Rate
## 0.8854915 0.7592138 0.7382269
## Detection Prevalence Balanced Accuracy
## 0.9081695 0.6332874
ad_tda_kde_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n1_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n1_3_fold
## Accuracy
## 1 -0.01922107
## 2 -0.01927605
## 3 -0.02252580
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9093333
##
## $winRope
## [1] 0.09066667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.9927207
##
## $rope
## [1] 0.006417089
##
## $right
## [1] 0.0008621679
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
#bf_tda_kde_5.50.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold)
## t = -18.618, df = 2, p-value = 0.002872
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02504175 -0.01564020
## sample estimates:
## mean of x
## -0.02034097
### Test set diff
diff_tda_kde_5.50.5_nn1.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n1_test
## Accuracy
## 0.01873464
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n1_test_odds.left<-bst_tda_kde_5.50.5_nn1.n1_test$probLeft/bst_tda_kde_5.50.5_nn1.n1_test$probRight
bst_tda_kde_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4610333
##
## $winRight
## [1] 0.5389667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n1_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test))
##Node2
#Neural Network 1
Adult_TDA_KDE_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4578.969820
## iter 10 value 4288.745873
## iter 20 value 4246.839192
## iter 30 value 4246.740901
## iter 40 value 4245.403865
## iter 50 value 4224.590291
## iter 60 value 4204.720593
## iter 70 value 4149.236494
## iter 80 value 4049.513720
## iter 90 value 3752.181553
## iter 100 value 3445.782988
## final value 3445.782988
## stopped after 100 iterations
## # weights: 331
## initial value 5693.979874
## iter 10 value 4244.354724
## iter 20 value 4239.151522
## iter 30 value 4230.921248
## iter 40 value 4210.629995
## iter 50 value 4176.703642
## iter 60 value 4160.871568
## iter 70 value 4147.240264
## iter 80 value 3995.244187
## iter 90 value 3899.968491
## iter 100 value 3343.814248
## final value 3343.814248
## stopped after 100 iterations
## # weights: 551
## initial value 7730.487351
## iter 10 value 4330.467747
## iter 20 value 4261.026048
## iter 30 value 3858.771649
## iter 40 value 3221.312967
## iter 50 value 3021.503163
## iter 60 value 2892.038878
## iter 70 value 2862.330802
## iter 80 value 2850.099088
## iter 90 value 2822.997495
## iter 100 value 2808.876162
## final value 2808.876162
## stopped after 100 iterations
## # weights: 771
## initial value 5194.587504
## iter 10 value 4303.955716
## iter 20 value 4165.390389
## iter 30 value 4152.426308
## iter 40 value 4108.177672
## iter 50 value 4002.170100
## iter 60 value 3927.927242
## iter 70 value 3691.644491
## iter 80 value 3025.308673
## iter 90 value 2892.753752
## iter 100 value 2744.469619
## final value 2744.469619
## stopped after 100 iterations
## # weights: 221
## initial value 6642.634879
## iter 10 value 4569.361917
## iter 20 value 4567.449720
## iter 30 value 4513.267336
## iter 40 value 4260.245410
## iter 50 value 4202.089714
## iter 60 value 4176.341618
## iter 70 value 4147.030489
## iter 80 value 4063.681773
## iter 90 value 3398.968548
## iter 100 value 3112.779898
## final value 3112.779898
## stopped after 100 iterations
## # weights: 331
## initial value 5071.575902
## iter 10 value 4498.699933
## iter 20 value 4453.523795
## iter 30 value 4271.223046
## iter 40 value 3750.398538
## iter 50 value 3544.801832
## iter 60 value 3303.055411
## iter 70 value 3078.538640
## iter 80 value 2951.693617
## iter 90 value 2949.196222
## iter 100 value 2947.821673
## final value 2947.821673
## stopped after 100 iterations
## # weights: 551
## initial value 5484.606512
## iter 10 value 4524.432108
## iter 20 value 4262.848689
## iter 30 value 4252.632792
## iter 40 value 4243.994257
## iter 50 value 4228.612963
## iter 60 value 4208.702816
## iter 70 value 4167.429927
## iter 80 value 4065.921635
## iter 90 value 3710.784836
## iter 100 value 3612.164467
## final value 3612.164467
## stopped after 100 iterations
## # weights: 771
## initial value 5369.184103
## iter 10 value 4580.164420
## iter 20 value 4278.922611
## iter 30 value 4254.623825
## iter 40 value 4250.830269
## iter 50 value 4234.386426
## iter 60 value 4201.921867
## iter 70 value 4174.749047
## iter 80 value 4139.780881
## iter 90 value 4001.149136
## iter 100 value 3883.690978
## final value 3883.690978
## stopped after 100 iterations
## # weights: 221
## initial value 6726.926196
## iter 10 value 4569.034572
## iter 20 value 4486.922305
## iter 30 value 4269.166351
## iter 40 value 4257.231413
## iter 50 value 4219.891635
## iter 60 value 4177.629928
## iter 70 value 4108.266162
## iter 80 value 4069.752107
## iter 90 value 3916.830866
## iter 100 value 3355.974813
## final value 3355.974813
## stopped after 100 iterations
## # weights: 331
## initial value 4606.624442
## iter 10 value 4314.641938
## iter 20 value 4260.152681
## iter 30 value 4255.360495
## iter 40 value 4245.081141
## iter 50 value 4237.312191
## iter 60 value 4221.223967
## iter 70 value 3904.060135
## iter 80 value 3673.734791
## iter 90 value 3411.661027
## iter 100 value 3289.864117
## final value 3289.864117
## stopped after 100 iterations
## # weights: 551
## initial value 4810.118216
## iter 10 value 4571.243437
## iter 20 value 4568.141118
## iter 30 value 4524.480298
## iter 40 value 4262.269239
## iter 50 value 4229.025403
## iter 60 value 4212.563941
## iter 70 value 4124.885284
## iter 80 value 4057.162035
## iter 90 value 4050.114488
## iter 100 value 4048.964096
## final value 4048.964096
## stopped after 100 iterations
## # weights: 771
## initial value 5966.575402
## iter 10 value 4546.156559
## iter 20 value 4240.230422
## iter 30 value 4219.397148
## iter 40 value 4176.639598
## iter 50 value 4024.789490
## iter 60 value 3629.352093
## iter 70 value 3281.564644
## iter 80 value 3258.818230
## iter 90 value 3146.695987
## iter 100 value 2920.608678
## final value 2920.608678
## stopped after 100 iterations
## # weights: 221
## initial value 6082.897143
## iter 10 value 4569.484210
## iter 20 value 4371.758210
## iter 30 value 4287.670824
## iter 40 value 4280.231771
## iter 50 value 4278.572182
## iter 60 value 4278.356423
## iter 70 value 4257.569700
## iter 80 value 4182.858220
## iter 90 value 3971.222966
## iter 100 value 3688.484559
## final value 3688.484559
## stopped after 100 iterations
## # weights: 331
## initial value 4589.079698
## iter 10 value 4569.919276
## iter 20 value 4569.808690
## iter 30 value 4536.658753
## iter 40 value 4536.521322
## iter 50 value 4503.760483
## iter 60 value 4297.566640
## iter 70 value 4233.253911
## iter 80 value 4006.357386
## iter 90 value 3405.208520
## iter 100 value 3039.266450
## final value 3039.266450
## stopped after 100 iterations
## # weights: 551
## initial value 4665.120895
## iter 10 value 4293.691102
## iter 20 value 4249.108035
## iter 30 value 4220.877492
## iter 40 value 4201.536955
## iter 50 value 4139.504145
## iter 60 value 4131.965089
## iter 70 value 4092.367431
## iter 80 value 3935.228846
## iter 90 value 3910.042256
## iter 100 value 3871.085811
## final value 3871.085811
## stopped after 100 iterations
## # weights: 771
## initial value 6191.154391
## iter 10 value 4441.373522
## iter 20 value 4309.703771
## iter 30 value 4279.012777
## iter 40 value 4278.798521
## iter 50 value 4278.783768
## final value 4278.783634
## converged
## # weights: 221
## initial value 4697.672237
## iter 10 value 4405.323486
## iter 20 value 4242.115938
## iter 30 value 4180.131465
## iter 40 value 4097.057325
## iter 50 value 3686.496421
## iter 60 value 3259.979906
## iter 70 value 3083.796546
## iter 80 value 2842.408305
## iter 90 value 2729.397854
## iter 100 value 2663.398083
## final value 2663.398083
## stopped after 100 iterations
## # weights: 331
## initial value 4853.475826
## iter 10 value 4538.527989
## iter 20 value 4391.126924
## iter 30 value 4263.862816
## iter 40 value 4234.730370
## iter 50 value 4226.156683
## iter 60 value 4220.882356
## iter 70 value 4218.434003
## iter 80 value 4215.463257
## iter 90 value 4105.951487
## iter 100 value 3683.708103
## final value 3683.708103
## stopped after 100 iterations
## # weights: 551
## initial value 5043.045786
## iter 10 value 4541.415021
## iter 20 value 4303.667664
## iter 30 value 4301.311765
## iter 40 value 4282.372260
## iter 50 value 4281.318458
## iter 60 value 4281.132176
## iter 70 value 4280.828847
## iter 70 value 4280.828823
## iter 80 value 4241.904541
## iter 90 value 4210.155658
## iter 100 value 4188.615895
## final value 4188.615895
## stopped after 100 iterations
## # weights: 771
## initial value 5613.882128
## iter 10 value 4565.476416
## iter 20 value 4290.525555
## iter 30 value 4277.409564
## iter 40 value 4276.083084
## iter 50 value 4275.418387
## iter 60 value 4274.370834
## iter 70 value 4274.331841
## final value 4274.331766
## converged
## # weights: 221
## initial value 5096.417925
## iter 10 value 4468.472505
## iter 20 value 4288.417144
## iter 30 value 4160.943101
## iter 40 value 4110.328589
## iter 50 value 3878.107189
## iter 60 value 3524.252581
## iter 70 value 3314.222921
## iter 80 value 3210.598780
## iter 90 value 3116.486936
## iter 100 value 2876.159683
## final value 2876.159683
## stopped after 100 iterations
## # weights: 331
## initial value 6247.830999
## iter 10 value 4536.485099
## iter 20 value 4298.207520
## iter 30 value 4297.679990
## iter 40 value 4297.168194
## iter 50 value 4285.037129
## iter 60 value 4272.829696
## iter 70 value 4268.000758
## iter 80 value 4209.725975
## iter 90 value 3938.353915
## iter 100 value 3757.476560
## final value 3757.476560
## stopped after 100 iterations
## # weights: 551
## initial value 4635.780563
## iter 10 value 4564.240470
## iter 20 value 4529.898477
## iter 30 value 4332.144270
## iter 40 value 4283.658219
## iter 50 value 4278.492498
## iter 60 value 4277.923064
## iter 70 value 4277.643637
## final value 4277.640515
## converged
## # weights: 771
## initial value 4675.268495
## iter 10 value 4497.612370
## iter 20 value 4303.203308
## iter 30 value 4290.362127
## iter 40 value 4264.846801
## iter 50 value 4241.062487
## iter 60 value 3925.587195
## iter 70 value 3592.868641
## iter 80 value 3571.832761
## iter 90 value 3529.808652
## iter 100 value 3261.509326
## final value 3261.509326
## stopped after 100 iterations
## # weights: 221
## initial value 5152.732780
## iter 10 value 4570.130857
## iter 10 value 4570.130856
## iter 10 value 4570.130849
## final value 4570.130849
## converged
## # weights: 331
## initial value 6274.913787
## final value 4570.084030
## converged
## # weights: 551
## initial value 4971.650025
## iter 10 value 4451.229253
## iter 20 value 4190.231978
## iter 30 value 4158.482257
## iter 40 value 4140.352513
## iter 50 value 4139.252740
## iter 60 value 4125.705688
## iter 70 value 4109.670621
## iter 80 value 4088.026722
## iter 90 value 4029.419780
## iter 100 value 3995.822357
## final value 3995.822357
## stopped after 100 iterations
## # weights: 771
## initial value 9656.387151
## iter 10 value 4286.846644
## iter 20 value 4252.005534
## iter 30 value 4146.617978
## iter 40 value 4141.978400
## iter 50 value 4138.751127
## iter 60 value 4125.463976
## iter 70 value 4111.886708
## iter 80 value 4052.442984
## iter 90 value 3933.545079
## iter 100 value 3560.486266
## final value 3560.486266
## stopped after 100 iterations
## # weights: 221
## initial value 4993.047106
## iter 10 value 4570.408984
## iter 20 value 4567.654456
## iter 30 value 4407.216605
## iter 40 value 4179.709063
## iter 50 value 4167.702600
## iter 60 value 4149.429273
## iter 70 value 4108.165439
## iter 80 value 4084.614907
## iter 90 value 3413.546424
## iter 100 value 3119.924301
## final value 3119.924301
## stopped after 100 iterations
## # weights: 331
## initial value 5126.899665
## iter 10 value 4546.643758
## iter 20 value 4545.789459
## iter 30 value 4507.006901
## iter 40 value 4143.564367
## iter 50 value 4121.277516
## iter 60 value 4039.227410
## iter 70 value 3796.556011
## iter 80 value 3584.281280
## iter 90 value 3166.228602
## iter 100 value 2865.244243
## final value 2865.244243
## stopped after 100 iterations
## # weights: 551
## initial value 7419.994364
## iter 10 value 4453.906999
## iter 20 value 4338.163136
## iter 30 value 4257.250308
## iter 40 value 4242.054903
## iter 50 value 4241.261720
## iter 60 value 4241.069665
## iter 70 value 4152.516231
## iter 80 value 4147.486033
## iter 90 value 4145.995788
## iter 100 value 4144.094760
## final value 4144.094760
## stopped after 100 iterations
## # weights: 771
## initial value 7805.544610
## iter 10 value 4547.882462
## iter 20 value 4475.555813
## iter 30 value 4274.740854
## iter 40 value 4142.175777
## iter 50 value 4087.038583
## iter 60 value 4080.139801
## iter 70 value 4075.407442
## iter 80 value 4054.396810
## iter 90 value 4043.486550
## iter 100 value 4040.746718
## final value 4040.746718
## stopped after 100 iterations
## # weights: 221
## initial value 5459.775388
## iter 10 value 4570.550124
## iter 20 value 4570.319021
## iter 30 value 4570.307348
## iter 40 value 4445.519097
## iter 50 value 4384.377286
## iter 60 value 4206.396405
## iter 70 value 4103.547689
## iter 80 value 4081.664167
## iter 90 value 4037.118126
## iter 100 value 3991.589082
## final value 3991.589082
## stopped after 100 iterations
## # weights: 331
## initial value 6199.265466
## iter 10 value 4563.943123
## iter 20 value 4263.791689
## iter 30 value 4167.996107
## iter 40 value 4098.819500
## iter 50 value 3975.231576
## iter 60 value 3832.071892
## iter 70 value 3686.391575
## iter 80 value 3248.767406
## iter 90 value 2877.960767
## iter 100 value 2807.073224
## final value 2807.073224
## stopped after 100 iterations
## # weights: 551
## initial value 5668.685641
## iter 10 value 4537.158070
## iter 20 value 4235.837195
## iter 30 value 4194.473294
## iter 40 value 4176.497576
## iter 50 value 4166.207669
## iter 60 value 4015.595980
## iter 70 value 3974.754892
## iter 80 value 3911.206452
## iter 90 value 3673.773324
## iter 100 value 3440.583455
## final value 3440.583455
## stopped after 100 iterations
## # weights: 771
## initial value 5456.883949
## iter 10 value 4535.423263
## iter 20 value 4242.428661
## iter 30 value 4216.193519
## iter 40 value 4190.592443
## iter 50 value 4106.558024
## iter 60 value 3826.840487
## iter 70 value 3507.915255
## iter 80 value 3180.285582
## iter 90 value 2834.396079
## iter 100 value 2784.500157
## final value 2784.500157
## stopped after 100 iterations
## # weights: 221
## initial value 7820.408026
## iter 10 value 6832.288398
## iter 20 value 6377.739534
## iter 30 value 6300.272687
## iter 40 value 6249.591558
## iter 50 value 6226.277227
## iter 60 value 6218.013569
## iter 70 value 6181.849265
## iter 80 value 5978.891629
## iter 90 value 5129.792405
## iter 100 value 4697.767364
## final value 4697.767364
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n2_NN1Fit0
## Neural Network
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7755, 7756, 7757
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7623282 0.2228695
## 2 0.5 0.8059961 0.4734469
## 2 0.7 0.7975765 0.3878566
## 3 0.3 0.7688631 0.2673628
## 3 0.5 0.8058269 0.4886870
## 3 0.7 0.7918216 0.3770158
## 5 0.3 0.7922412 0.3856859
## 5 0.5 0.7647408 0.2833271
## 5 0.7 0.7640527 0.2272761
## 7 0.3 0.7896629 0.3666727
## 7 0.5 0.7669757 0.2608337
## 7 0.7 0.8005848 0.5223055
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7717307 0.2794060 Fold3
## 2 0.8336772 0.5864984 Fold2
## 3 0.8125806 0.5544361 Fold1
ad_tda_kde_5.50.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.13 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.03 0.00 0.01 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 3.27 -0.13 1.10 -0.52 -0.41 0.00 -0.51 -0.65
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 2.53 1.79 -0.05 0.00 0.00 -0.02 -0.56 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 2.02 2.27 -4.56 0.00 4.74
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -5.36 0.00 0.00 4.75 0.36 1.57 -0.13 -4.41
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.81 2.57 2.57 0.30 1.09 -0.54 0.02 0.40
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -3.02 4.04 0.93 1.76 4.11 0.06 -2.22 -3.48
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -1.11 0.15 1.09 -1.11 2.01 2.13 2.55 2.81
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -5.12 1.99 -0.76 0.40 1.03 0.61 2.78 0.50
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.25 -0.23 -2.47 0.93 0.73
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.47 0.13 0.41 -1.01 -0.65 -0.16 -1.26 1.92
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## -0.40 0.44 0.00 0.07 0.07 0.67 -0.20 0.15
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -0.10 -0.11 0.54 0.50 -0.11 -0.01 0.13 -0.17
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.07 -0.90 0.03 0.27 0.97 0.21 1.14 0.40
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.29 0.51 -0.37 0.95 0.11
## b->o h1->o h2->o
## 0.89 -0.06 -3.92
vip(Adult_TDA_KDE_5.50.5_n2_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6006 569
## >50K 1410 1783
##
## Accuracy : 0.7974
## 95% CI : (0.7893, 0.8053)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5062
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.8099
## Specificity : 0.7581
## Pos Pred Value : 0.9135
## Neg Pred Value : 0.5584
## Prevalence : 0.7592
## Detection Rate : 0.6149
## Detection Prevalence : 0.6731
## Balanced Accuracy : 0.7840
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6006 569
## >50K 1410 1783
##
## Accuracy : 0.7974
## 95% CI : (0.7893, 0.8053)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5062
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.8099
## Specificity : 0.7581
## Pos Pred Value : 0.9135
## Neg Pred Value : 0.5584
## Prevalence : 0.7592
## Detection Rate : 0.6149
## Detection Prevalence : 0.6731
## Balanced Accuracy : 0.7840
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.973997e-01 5.061561e-01 7.892895e-01 8.053321e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.281964e-19 1.592910e-79
ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8098706 0.7580782 0.9134601
## Neg Pred Value Precision Recall
## 0.5584090 0.9134601 0.8098706
## F1 Prevalence Detection Rate
## 0.8585519 0.7592138 0.6148649
## Detection Prevalence Balanced Accuracy
## 0.6731163 0.7839744
ad_tda_kde_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n2_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n2_3_fold
## Accuracy
## 1 0.056908396
## 2 0.012577948
## 3 -0.005657688
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3928667
##
## $winRight
## [1] 0.6071333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.1410987
##
## $rope
## [1] 0.1846968
##
## $right
## [1] 0.6742046
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
#bf_tda_kde_5.50.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold)
## t = 1.1453, df = 2, p-value = 0.3707
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.05865643 0.10120887
## sample estimates:
## mean of x
## 0.02127622
### Test set diff
diff_tda_kde_5.50.5_nn1.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n2_test
## Accuracy
## 0.03040541
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n2_test_odds.left<-bst_tda_kde_5.50.5_nn1.n2_test$probLeft/bst_tda_kde_5.50.5_nn1.n2_test$probRight
bst_tda_kde_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1570333
##
## $winRight
## [1] 0.8429667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n2_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test))
##Node3
#Neural Network 1
Adult_TDA_KDE_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 7412.470507
## iter 10 value 4267.230875
## iter 20 value 4205.761387
## iter 30 value 4142.553586
## iter 40 value 4115.473166
## iter 50 value 4091.246873
## iter 60 value 4002.668375
## iter 70 value 3948.169741
## iter 80 value 3891.061318
## iter 90 value 3646.555355
## iter 100 value 3412.246403
## final value 3412.246403
## stopped after 100 iterations
## # weights: 331
## initial value 7114.635684
## iter 10 value 4546.404903
## iter 20 value 4203.853701
## iter 30 value 4190.730409
## iter 40 value 4180.757384
## iter 50 value 4126.092904
## iter 60 value 4097.499505
## iter 70 value 4071.408349
## iter 80 value 3914.444253
## iter 90 value 3477.687500
## iter 100 value 3016.829544
## final value 3016.829544
## stopped after 100 iterations
## # weights: 551
## initial value 5147.905100
## iter 10 value 4535.783417
## iter 20 value 4282.170028
## iter 30 value 4265.334582
## iter 40 value 4261.200400
## iter 50 value 4168.501417
## iter 60 value 4124.081285
## iter 70 value 4082.732765
## iter 80 value 3324.930915
## iter 90 value 3025.548321
## iter 100 value 2907.674829
## final value 2907.674829
## stopped after 100 iterations
## # weights: 771
## initial value 5065.637065
## iter 10 value 4356.515922
## iter 20 value 4268.318364
## iter 30 value 4260.420853
## iter 40 value 4228.983064
## iter 50 value 4191.776436
## iter 60 value 4140.767683
## iter 70 value 4127.365350
## iter 80 value 4114.750277
## iter 90 value 4107.811325
## iter 100 value 4104.769314
## final value 4104.769314
## stopped after 100 iterations
## # weights: 221
## initial value 7466.657174
## iter 10 value 4424.757029
## iter 20 value 4258.191796
## iter 30 value 4174.491671
## iter 40 value 4157.310259
## iter 50 value 4144.276201
## iter 60 value 4125.264156
## iter 70 value 4113.572516
## iter 80 value 4049.925242
## iter 90 value 3965.183168
## iter 100 value 3891.837349
## final value 3891.837349
## stopped after 100 iterations
## # weights: 331
## initial value 5022.553667
## iter 10 value 4572.802469
## iter 20 value 4568.585010
## iter 30 value 4568.536202
## iter 40 value 4313.452083
## iter 50 value 4269.612508
## iter 60 value 4247.396159
## iter 70 value 4184.131636
## iter 80 value 4080.786748
## iter 90 value 4040.272948
## iter 100 value 3622.073090
## final value 3622.073090
## stopped after 100 iterations
## # weights: 551
## initial value 4593.815381
## iter 10 value 4381.213133
## iter 20 value 4333.901083
## iter 30 value 4264.631854
## iter 40 value 4244.398424
## iter 50 value 4198.245838
## iter 60 value 4123.280995
## iter 70 value 3652.090322
## iter 80 value 3115.333235
## iter 90 value 3010.798703
## iter 100 value 2978.120606
## final value 2978.120606
## stopped after 100 iterations
## # weights: 771
## initial value 6120.307493
## iter 10 value 4357.386734
## iter 20 value 4093.057995
## iter 30 value 3631.593605
## iter 40 value 3348.014081
## iter 50 value 3019.158841
## iter 60 value 2985.297972
## iter 70 value 2880.917919
## iter 80 value 2815.336954
## iter 90 value 2797.166074
## iter 100 value 2759.344480
## final value 2759.344480
## stopped after 100 iterations
## # weights: 221
## initial value 6366.721592
## iter 10 value 4570.578400
## iter 20 value 4568.727448
## iter 30 value 4568.705860
## iter 40 value 4567.882839
## iter 50 value 4280.551403
## iter 60 value 4263.690530
## iter 70 value 4245.982500
## iter 80 value 4218.781979
## iter 90 value 4183.613514
## iter 100 value 4172.457104
## final value 4172.457104
## stopped after 100 iterations
## # weights: 331
## initial value 4647.251441
## iter 10 value 4522.805021
## iter 20 value 4284.729106
## iter 30 value 4271.327422
## iter 40 value 4171.539247
## iter 50 value 4149.305180
## iter 60 value 4098.030030
## iter 70 value 3987.134813
## iter 80 value 3392.914200
## iter 90 value 3021.720380
## iter 100 value 2842.820202
## final value 2842.820202
## stopped after 100 iterations
## # weights: 551
## initial value 6961.113765
## iter 10 value 4557.862421
## iter 20 value 4321.559772
## iter 30 value 4256.577803
## iter 40 value 4250.227187
## iter 50 value 4203.143371
## iter 60 value 4190.127478
## iter 70 value 4156.395240
## iter 80 value 4141.806587
## iter 90 value 4115.675439
## iter 100 value 4078.390715
## final value 4078.390715
## stopped after 100 iterations
## # weights: 771
## initial value 7992.712593
## iter 10 value 4309.895954
## iter 20 value 4301.190930
## iter 30 value 4192.483397
## iter 40 value 4128.915952
## iter 50 value 3981.125111
## iter 60 value 3892.523630
## iter 70 value 3590.207267
## iter 80 value 3097.804414
## iter 90 value 2841.920229
## iter 100 value 2744.196107
## final value 2744.196107
## stopped after 100 iterations
## # weights: 221
## initial value 4640.114070
## final value 4569.807399
## converged
## # weights: 331
## initial value 5069.341441
## iter 10 value 4487.865259
## iter 20 value 4332.418787
## iter 30 value 4239.303014
## iter 40 value 4211.245525
## iter 50 value 4140.408790
## iter 60 value 4045.406700
## iter 70 value 3658.604491
## iter 80 value 3288.338797
## iter 90 value 3170.917240
## iter 100 value 2997.583241
## final value 2997.583241
## stopped after 100 iterations
## # weights: 551
## initial value 5183.847153
## iter 10 value 4233.939556
## iter 20 value 4192.271963
## iter 30 value 4187.700803
## iter 40 value 4175.733413
## iter 50 value 4106.817151
## iter 60 value 4087.720895
## iter 70 value 4052.588389
## iter 80 value 4048.488209
## iter 90 value 4034.973240
## iter 100 value 4030.309109
## final value 4030.309109
## stopped after 100 iterations
## # weights: 771
## initial value 4665.578155
## iter 10 value 4307.056700
## iter 20 value 4159.620362
## iter 30 value 4086.288189
## iter 40 value 4038.272742
## iter 50 value 3885.284368
## iter 60 value 3764.979099
## iter 70 value 3100.628094
## iter 80 value 2875.881165
## iter 90 value 2846.229541
## iter 100 value 2812.390481
## final value 2812.390481
## stopped after 100 iterations
## # weights: 221
## initial value 4778.442221
## iter 10 value 4570.089598
## iter 20 value 4569.933304
## iter 30 value 4555.907574
## iter 40 value 4216.092346
## iter 50 value 4194.083869
## iter 60 value 4191.840284
## iter 70 value 4118.863664
## iter 80 value 4104.237333
## iter 90 value 4064.801352
## iter 100 value 4057.071497
## final value 4057.071497
## stopped after 100 iterations
## # weights: 331
## initial value 5509.980494
## iter 10 value 4559.306481
## iter 20 value 4337.723878
## iter 30 value 4254.081828
## iter 40 value 4242.802331
## iter 50 value 4241.562972
## iter 60 value 4202.684285
## iter 70 value 4144.043383
## iter 80 value 4102.764457
## iter 90 value 4099.704042
## iter 100 value 4096.030821
## final value 4096.030821
## stopped after 100 iterations
## # weights: 551
## initial value 4635.285825
## iter 10 value 4562.714838
## iter 20 value 4380.020330
## iter 30 value 4190.040060
## iter 40 value 4173.987113
## iter 50 value 4166.483132
## iter 60 value 4148.395644
## iter 70 value 4106.657037
## iter 80 value 4101.331707
## iter 90 value 4097.576576
## iter 100 value 4090.784203
## final value 4090.784203
## stopped after 100 iterations
## # weights: 771
## initial value 5078.066171
## iter 10 value 4574.170414
## iter 20 value 4569.812619
## iter 30 value 4569.761524
## iter 40 value 4381.725728
## iter 50 value 4283.808273
## iter 60 value 4259.922901
## iter 70 value 4192.990334
## iter 80 value 4166.678791
## iter 90 value 4137.736091
## iter 100 value 4099.902179
## final value 4099.902179
## stopped after 100 iterations
## # weights: 221
## initial value 6009.391852
## iter 10 value 4571.607869
## iter 20 value 4570.011612
## iter 30 value 4569.993077
## iter 40 value 4527.317924
## iter 50 value 4525.762456
## iter 60 value 4489.082042
## iter 70 value 4203.365672
## iter 80 value 3991.638186
## iter 90 value 3681.202435
## iter 100 value 3409.735181
## final value 3409.735181
## stopped after 100 iterations
## # weights: 331
## initial value 4613.649812
## iter 10 value 4386.062000
## iter 20 value 4282.093479
## iter 30 value 4270.164528
## iter 40 value 4194.055903
## iter 50 value 4179.107129
## iter 60 value 4161.126023
## iter 70 value 4159.516876
## iter 80 value 4158.373158
## iter 90 value 4145.523208
## iter 100 value 4136.196285
## final value 4136.196285
## stopped after 100 iterations
## # weights: 551
## initial value 6295.985916
## iter 10 value 4440.185437
## iter 20 value 4267.261246
## iter 30 value 4197.364666
## iter 40 value 4141.885198
## iter 50 value 4094.081867
## iter 60 value 4083.495447
## iter 70 value 4077.722491
## iter 80 value 4070.236389
## iter 90 value 4061.222619
## iter 100 value 3534.441884
## final value 3534.441884
## stopped after 100 iterations
## # weights: 771
## initial value 4667.656022
## iter 10 value 4565.486794
## iter 20 value 4290.575974
## iter 30 value 4237.354483
## iter 40 value 4227.845563
## iter 50 value 4212.142660
## iter 60 value 4141.467539
## iter 70 value 3872.824228
## iter 80 value 3498.180300
## iter 90 value 3064.501726
## iter 100 value 2856.495736
## final value 2856.495736
## stopped after 100 iterations
## # weights: 221
## initial value 4698.607756
## iter 10 value 4406.308576
## iter 20 value 4211.841986
## iter 30 value 4211.655013
## iter 40 value 4192.403154
## iter 50 value 4188.511215
## iter 60 value 4185.243101
## iter 70 value 4178.461265
## iter 80 value 4175.060855
## iter 90 value 4155.171921
## iter 100 value 4138.429966
## final value 4138.429966
## stopped after 100 iterations
## # weights: 331
## initial value 8014.560724
## iter 10 value 4546.707616
## iter 20 value 4253.374461
## iter 30 value 4231.285785
## iter 40 value 3931.961484
## iter 50 value 3362.379177
## iter 60 value 3234.038990
## iter 70 value 3229.882617
## iter 80 value 3214.579244
## iter 90 value 2965.979276
## iter 100 value 2732.087678
## final value 2732.087678
## stopped after 100 iterations
## # weights: 551
## initial value 4953.359401
## iter 10 value 4538.647976
## iter 20 value 4294.711882
## iter 30 value 4242.406012
## iter 40 value 4225.773767
## iter 50 value 4135.443973
## iter 60 value 4074.293299
## iter 70 value 3669.157337
## iter 80 value 3395.093676
## iter 90 value 2993.992553
## iter 100 value 2833.594259
## final value 2833.594259
## stopped after 100 iterations
## # weights: 771
## initial value 5646.371007
## iter 10 value 4429.478202
## iter 20 value 4178.722222
## iter 30 value 4166.817466
## iter 40 value 4158.006758
## iter 50 value 4118.056968
## iter 60 value 4104.648116
## iter 70 value 4101.878868
## iter 80 value 4092.088413
## iter 90 value 4062.985441
## iter 100 value 4042.031117
## final value 4042.031117
## stopped after 100 iterations
## # weights: 221
## initial value 4589.182277
## iter 10 value 4570.299971
## iter 20 value 4569.819471
## final value 4569.813929
## converged
## # weights: 331
## initial value 6790.830581
## iter 10 value 4570.139023
## iter 20 value 4543.826172
## iter 30 value 4529.375436
## iter 40 value 4420.989020
## iter 50 value 4310.302905
## iter 60 value 4182.481629
## iter 70 value 4174.547304
## iter 80 value 4168.419220
## iter 90 value 4166.519044
## iter 100 value 4135.507660
## final value 4135.507660
## stopped after 100 iterations
## # weights: 551
## initial value 4893.147502
## iter 10 value 4292.326286
## iter 20 value 4232.171514
## iter 30 value 4197.910077
## iter 40 value 4055.491823
## iter 50 value 3619.391896
## iter 60 value 3379.569464
## iter 70 value 3319.076125
## iter 80 value 3303.709731
## iter 90 value 3299.973606
## iter 100 value 3240.567407
## final value 3240.567407
## stopped after 100 iterations
## # weights: 771
## initial value 6269.268926
## iter 10 value 4272.709599
## iter 20 value 4252.351864
## iter 30 value 4251.427537
## iter 40 value 4249.881747
## iter 50 value 4249.183112
## iter 60 value 4247.932007
## iter 70 value 4242.658866
## iter 80 value 4236.083840
## iter 90 value 4214.945598
## iter 100 value 4118.956732
## final value 4118.956732
## stopped after 100 iterations
## # weights: 221
## initial value 6909.261443
## iter 10 value 4485.028669
## iter 20 value 4213.513736
## iter 30 value 4195.938086
## iter 40 value 4181.768373
## iter 50 value 4171.487443
## iter 60 value 3972.668461
## iter 70 value 3919.144225
## iter 80 value 3758.446739
## iter 90 value 3509.861640
## iter 100 value 3354.980678
## final value 3354.980678
## stopped after 100 iterations
## # weights: 331
## initial value 5625.366223
## iter 10 value 4417.189585
## iter 20 value 4378.160473
## iter 30 value 4376.755342
## iter 40 value 4273.072173
## iter 50 value 4221.627318
## iter 60 value 4220.426157
## iter 70 value 4218.264869
## iter 80 value 4171.719218
## iter 90 value 4158.367043
## iter 100 value 4157.446969
## final value 4157.446969
## stopped after 100 iterations
## # weights: 551
## initial value 5401.119581
## iter 10 value 4570.105124
## iter 20 value 4339.874070
## iter 30 value 4271.528673
## iter 40 value 4204.559479
## iter 50 value 4193.714347
## iter 60 value 4187.933163
## iter 70 value 4172.991816
## iter 80 value 4156.185556
## iter 90 value 4154.172101
## iter 100 value 4150.877174
## final value 4150.877174
## stopped after 100 iterations
## # weights: 771
## initial value 5942.003919
## iter 10 value 4559.908531
## iter 20 value 4361.311167
## iter 30 value 4186.517182
## iter 40 value 4169.880477
## iter 50 value 4156.617883
## iter 60 value 4151.325613
## iter 70 value 4100.350445
## iter 80 value 4090.425728
## iter 90 value 4077.936707
## iter 100 value 3700.047178
## final value 3700.047178
## stopped after 100 iterations
## # weights: 771
## initial value 7578.739209
## iter 10 value 6811.245614
## iter 20 value 6507.229818
## iter 30 value 6368.908979
## iter 40 value 6339.983348
## iter 50 value 6336.392618
## iter 60 value 6309.494602
## iter 70 value 6295.369777
## iter 80 value 6245.754579
## iter 90 value 6242.493054
## iter 100 value 6233.395070
## final value 6233.395070
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n3_NN1Fit0
## Neural Network
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7755, 7756, 7757
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7575204 0.2493056
## 2 0.5 0.7529617 0.1983997
## 2 0.7 0.7965491 0.4140765
## 3 0.3 0.8041987 0.4732673
## 3 0.5 0.7674922 0.3000316
## 3 0.7 0.7883731 0.3758278
## 5 0.3 0.8032500 0.4597113
## 5 0.5 0.7974020 0.4547382
## 5 0.7 0.7774628 0.3481146
## 7 0.3 0.7876913 0.3786681
## 7 0.5 0.7874285 0.3680633
## 7 0.7 0.8111514 0.4646650
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.7.
Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8323878 0.5609659 Fold2
## 2 0.8329466 0.5769633 Fold1
## 3 0.7681197 0.2560658 Fold3
ad_tda_kde_5.50.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 -0.02 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 -0.02 0.00 0.00 0.01
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 -0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.01 -0.01 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 -0.01 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.04 0.00 -0.07 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.40 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.01 0.00 -0.01
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.01 0.00 0.00 0.00 0.06 -0.01 0.00 0.04
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 -0.03 0.00 0.00 0.00 -0.01 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.01 0.00 0.00 0.00 -0.01 0.00 0.01 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.03 -0.02 0.00 -0.01 -0.01
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.01 0.00 0.00 -0.01 0.00 0.00 -0.02 0.02
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.22 1.82 0.33 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.07 0.00 0.00 0.00 0.00 0.01 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## -0.01 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.01 0.00 -0.01
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.02 0.97 -0.15 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.00 0.00 0.00 0.22 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.02 1.04 0.03 0.00 0.00 0.00 0.14 -0.11
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## -0.04 0.01 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 -0.10 0.21 0.00 -0.05
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## -0.14 0.00 0.00 0.09 0.22 0.29 0.00 -0.12
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 -0.16 0.00 0.00 0.03 0.19 0.00 -0.08
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## -0.09 0.00 -0.09 0.01 0.00 0.00 -0.03 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.09 0.00 0.00 -0.14 0.14 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.02 0.00 -0.08 0.00 0.00 0.10 0.16 -0.14
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## -0.43 0.00 0.72 0.02 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 -0.11 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.11 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## -0.02 -0.30 0.00 0.00 0.00 0.00 -0.02 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 -0.06 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 -0.02 -0.19 0.00 0.00 0.02
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 -0.04 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.01 0.00 0.00 0.00 0.00 0.00 -0.04 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.01 0.00 0.00 0.02 -0.04 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 -0.02 -0.04 0.02
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.35 0.00 0.17 0.01 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 -0.03 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## 0.41 0.44 0.55 1.13 0.11 -2.22 -0.02 0.51
vip(Adult_TDA_KDE_5.50.5_n3_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7168 1717
## >50K 248 635
##
## Accuracy : 0.7988
## 95% CI : (0.7907, 0.8067)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3007
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9666
## Specificity : 0.2700
## Pos Pred Value : 0.8068
## Neg Pred Value : 0.7191
## Prevalence : 0.7592
## Detection Rate : 0.7338
## Detection Prevalence : 0.9096
## Balanced Accuracy : 0.6183
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7168 1717
## >50K 248 635
##
## Accuracy : 0.7988
## 95% CI : (0.7907, 0.8067)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3007
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9666
## Specificity : 0.2700
## Pos Pred Value : 0.8068
## Neg Pred Value : 0.7191
## Prevalence : 0.7592
## Detection Rate : 0.7338
## Detection Prevalence : 0.9096
## Balanced Accuracy : 0.6183
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.988329e-01 3.006549e-01 7.907434e-01 8.067438e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.234587e-21 1.718627e-240
ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9665588 0.2699830 0.8067530
## Neg Pred Value Precision Recall
## 0.7191393 0.8067530 0.9665588
## F1 Prevalence Detection Rate
## 0.8794552 0.7592138 0.7338247
## Detection Prevalence Balanced Accuracy
## 0.9096028 0.6182709
ad_tda_kde_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n3_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n3_3_fold
## Accuracy
## 1 -0.003748713
## 2 0.013308465
## 3 0.038803194
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3938333
##
## $winRight
## [1] 0.6061667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.1043999
##
## $rope
## [1] 0.2505368
##
## $right
## [1] 0.6450633
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
#bf_tda_kde_5.50.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold)
## t = 1.3039, df = 2, p-value = 0.3222
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03707663 0.06931860
## sample estimates:
## mean of x
## 0.01612098
### Test set diff
diff_tda_kde_5.50.5_nn1.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n3_test
## Accuracy
## 0.02897215
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n3_test_odds.left<-bst_tda_kde_5.50.5_nn1.n3_test$probLeft/bst_tda_kde_5.50.5_nn1.n3_test$probRight
bst_tda_kde_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1585
##
## $winRight
## [1] 0.8415
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n3_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_KDE_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 5739.281512
## iter 10 value 3432.601305
## iter 20 value 3432.590216
## iter 30 value 3432.515145
## final value 3432.513480
## converged
## # weights: 331
## initial value 3450.518076
## iter 10 value 3341.946849
## iter 20 value 3226.640711
## iter 30 value 3194.340060
## iter 40 value 3165.005668
## iter 50 value 3151.477553
## iter 60 value 3143.784828
## iter 70 value 3126.374783
## iter 80 value 3084.365997
## iter 90 value 2675.818819
## iter 100 value 2260.386954
## final value 2260.386954
## stopped after 100 iterations
## # weights: 551
## initial value 3481.334792
## iter 10 value 3363.979301
## iter 20 value 3243.896940
## iter 30 value 3220.757619
## iter 40 value 3209.696549
## iter 50 value 3206.312986
## iter 60 value 3185.351161
## iter 70 value 3098.510879
## iter 80 value 2937.633414
## iter 90 value 2510.093019
## iter 100 value 2268.828289
## final value 2268.828289
## stopped after 100 iterations
## # weights: 771
## initial value 5133.083721
## iter 10 value 3412.979213
## iter 20 value 3213.986857
## iter 30 value 3117.205876
## iter 40 value 3096.386761
## iter 50 value 3015.328138
## iter 60 value 2894.762950
## iter 70 value 2534.378123
## iter 80 value 2181.018128
## iter 90 value 2072.737869
## iter 100 value 2064.979188
## final value 2064.979188
## stopped after 100 iterations
## # weights: 221
## initial value 5725.869290
## iter 10 value 3433.345928
## iter 20 value 3432.785068
## iter 30 value 3432.778596
## iter 30 value 3432.778569
## iter 30 value 3432.778563
## final value 3432.778563
## converged
## # weights: 331
## initial value 4586.983193
## iter 10 value 3386.663792
## iter 20 value 3204.888886
## iter 30 value 3191.651712
## iter 40 value 3190.459578
## iter 50 value 3188.602156
## iter 60 value 3187.215612
## iter 70 value 3187.027871
## final value 3187.025599
## converged
## # weights: 551
## initial value 5901.508141
## iter 10 value 3289.670713
## iter 20 value 3287.986466
## iter 30 value 3287.776842
## iter 40 value 3287.441148
## iter 50 value 3246.801746
## iter 60 value 3214.778119
## iter 70 value 3163.434840
## iter 80 value 3157.547776
## iter 90 value 3156.575501
## iter 100 value 3155.413311
## final value 3155.413311
## stopped after 100 iterations
## # weights: 771
## initial value 3799.109582
## iter 10 value 3222.883339
## iter 20 value 3196.882939
## iter 30 value 3182.818440
## iter 40 value 3172.497276
## iter 50 value 3152.453050
## iter 60 value 3147.608524
## iter 70 value 3144.441809
## iter 80 value 3093.903896
## iter 90 value 3000.990871
## iter 100 value 2814.517422
## final value 2814.517422
## stopped after 100 iterations
## # weights: 221
## initial value 6470.451530
## iter 10 value 3433.261482
## iter 20 value 3433.242319
## iter 30 value 3422.603661
## iter 40 value 3224.507871
## iter 50 value 3221.698690
## iter 60 value 3210.318176
## iter 70 value 3181.523810
## iter 80 value 3030.962010
## iter 90 value 2901.840716
## iter 100 value 2850.965347
## final value 2850.965347
## stopped after 100 iterations
## # weights: 331
## initial value 5649.842980
## iter 10 value 3416.712045
## iter 20 value 3196.424761
## iter 30 value 3187.145246
## iter 40 value 3168.144759
## iter 50 value 2907.364285
## iter 60 value 2714.574292
## iter 70 value 2570.658358
## iter 80 value 2505.827709
## iter 90 value 2462.142673
## iter 100 value 2421.461276
## final value 2421.461276
## stopped after 100 iterations
## # weights: 551
## initial value 4052.239231
## iter 10 value 3415.406693
## iter 20 value 3346.395549
## iter 30 value 3283.281501
## iter 40 value 2965.985151
## iter 50 value 2845.465094
## iter 60 value 2802.139758
## iter 70 value 2483.521063
## iter 80 value 2143.156437
## iter 90 value 2089.615078
## iter 100 value 2085.077917
## final value 2085.077917
## stopped after 100 iterations
## # weights: 771
## initial value 5918.571805
## iter 10 value 3459.592492
## iter 20 value 3398.022557
## iter 30 value 3204.875050
## iter 40 value 3194.308070
## iter 50 value 3188.657602
## iter 60 value 3166.729518
## iter 70 value 3157.614683
## iter 80 value 3123.678196
## iter 90 value 3110.377445
## iter 100 value 3106.612637
## final value 3106.612637
## stopped after 100 iterations
## # weights: 221
## initial value 4985.662268
## iter 10 value 3406.587356
## iter 20 value 3254.581342
## iter 30 value 3192.982901
## iter 40 value 3187.298890
## iter 50 value 3181.065538
## iter 60 value 3130.339159
## iter 70 value 3107.168343
## iter 80 value 2994.181793
## iter 90 value 2567.637557
## iter 100 value 2410.906581
## final value 2410.906581
## stopped after 100 iterations
## # weights: 331
## initial value 5251.543139
## iter 10 value 3198.793024
## iter 20 value 3171.813244
## iter 30 value 3168.376437
## iter 40 value 3157.281212
## iter 50 value 3144.167566
## iter 60 value 3113.045247
## iter 70 value 3094.291351
## iter 80 value 3086.168782
## iter 90 value 3069.147230
## iter 100 value 3057.601310
## final value 3057.601310
## stopped after 100 iterations
## # weights: 551
## initial value 4242.602357
## iter 10 value 3406.723963
## iter 20 value 3180.206248
## iter 30 value 3144.876508
## iter 40 value 3054.163376
## iter 50 value 2836.610838
## iter 60 value 2488.014532
## iter 70 value 2178.205320
## iter 80 value 2051.968034
## iter 90 value 2005.046786
## iter 100 value 1972.229363
## final value 1972.229363
## stopped after 100 iterations
## # weights: 771
## initial value 3705.296238
## iter 10 value 3220.692749
## iter 20 value 3197.029921
## iter 30 value 3132.419701
## iter 40 value 3001.678314
## iter 50 value 2835.038653
## iter 60 value 2747.318658
## iter 70 value 2277.520531
## iter 80 value 2125.005006
## iter 90 value 2038.188064
## iter 100 value 2022.907251
## final value 2022.907251
## stopped after 100 iterations
## # weights: 221
## initial value 3810.044011
## iter 10 value 3432.093392
## iter 20 value 3311.873578
## iter 30 value 3275.821821
## iter 40 value 3275.663747
## iter 50 value 3188.974361
## iter 60 value 3167.391231
## iter 70 value 3161.844708
## iter 80 value 3161.080743
## iter 90 value 3161.030751
## iter 90 value 3161.030747
## iter 90 value 3161.030747
## final value 3161.030747
## converged
## # weights: 331
## initial value 5119.457146
## iter 10 value 3407.890927
## iter 20 value 3213.500860
## iter 30 value 3153.476266
## iter 40 value 3142.115584
## iter 50 value 3137.662985
## iter 60 value 3134.534136
## iter 70 value 3127.325580
## iter 80 value 3081.814574
## iter 90 value 2851.313826
## iter 100 value 2778.127530
## final value 2778.127530
## stopped after 100 iterations
## # weights: 551
## initial value 3739.077690
## iter 10 value 3409.234620
## iter 20 value 3184.793828
## iter 30 value 3171.101876
## iter 40 value 3169.711353
## iter 50 value 3167.760372
## iter 60 value 3165.231782
## iter 70 value 3117.790536
## iter 80 value 3107.810737
## iter 90 value 3097.632866
## iter 100 value 3095.884500
## final value 3095.884500
## stopped after 100 iterations
## # weights: 771
## initial value 4130.144297
## iter 10 value 3316.485137
## iter 20 value 3219.276039
## iter 30 value 3181.471810
## iter 40 value 3181.150760
## iter 50 value 3178.835029
## iter 60 value 3170.331172
## iter 70 value 3167.270638
## iter 80 value 3164.012849
## iter 90 value 3153.558596
## iter 100 value 3152.959356
## final value 3152.959356
## stopped after 100 iterations
## # weights: 221
## initial value 6884.284514
## iter 10 value 3432.313883
## iter 20 value 3428.717757
## iter 30 value 3277.427953
## iter 40 value 3265.472836
## iter 50 value 3200.167927
## iter 60 value 3198.024588
## iter 70 value 3159.940962
## iter 80 value 3137.810555
## iter 90 value 3109.357525
## iter 100 value 3103.465767
## final value 3103.465767
## stopped after 100 iterations
## # weights: 331
## initial value 4829.081789
## iter 10 value 3301.675316
## iter 20 value 3190.153191
## iter 30 value 3175.838379
## iter 40 value 3167.331351
## iter 50 value 3163.593063
## iter 60 value 3162.692347
## iter 70 value 3160.778609
## iter 80 value 3149.475137
## iter 90 value 3129.427416
## iter 100 value 3114.338071
## final value 3114.338071
## stopped after 100 iterations
## # weights: 551
## initial value 3540.390953
## iter 10 value 3273.591338
## iter 20 value 3178.375256
## iter 30 value 3169.700267
## iter 40 value 3162.364890
## iter 50 value 3162.228570
## iter 60 value 3161.837130
## iter 70 value 3128.478032
## iter 80 value 3100.259942
## iter 90 value 3083.159651
## iter 100 value 3061.989222
## final value 3061.989222
## stopped after 100 iterations
## # weights: 771
## initial value 3584.019026
## iter 10 value 3412.165441
## iter 20 value 3409.367353
## iter 30 value 3397.211063
## iter 40 value 3299.058350
## iter 50 value 3238.218726
## iter 60 value 3195.961334
## iter 70 value 3156.391264
## iter 80 value 3117.297208
## iter 90 value 3071.980941
## iter 100 value 2936.549123
## final value 2936.549123
## stopped after 100 iterations
## # weights: 221
## initial value 4919.608577
## iter 10 value 3422.659695
## iter 20 value 3193.599264
## iter 30 value 3101.697611
## iter 40 value 2892.221606
## iter 50 value 2522.480328
## iter 60 value 2383.253066
## iter 70 value 2284.067196
## iter 80 value 2164.057123
## iter 90 value 2040.110581
## iter 100 value 1988.460685
## final value 1988.460685
## stopped after 100 iterations
## # weights: 331
## initial value 3482.789483
## iter 10 value 3423.011587
## iter 20 value 3213.542123
## iter 30 value 3186.712387
## iter 40 value 3166.732167
## iter 50 value 3077.651261
## iter 60 value 3049.055296
## iter 70 value 2806.979761
## iter 80 value 2425.944635
## iter 90 value 2202.129618
## iter 100 value 2170.358716
## final value 2170.358716
## stopped after 100 iterations
## # weights: 551
## initial value 7764.047808
## iter 10 value 3418.037478
## iter 20 value 3296.831403
## iter 30 value 3202.206498
## iter 40 value 3198.453406
## iter 50 value 3192.991746
## iter 60 value 3184.437752
## iter 70 value 3151.225245
## iter 80 value 3059.952375
## iter 90 value 3017.872404
## iter 100 value 2908.463666
## final value 2908.463666
## stopped after 100 iterations
## # weights: 771
## initial value 3533.219461
## iter 10 value 3359.850030
## iter 20 value 3216.537091
## iter 30 value 3204.256279
## iter 40 value 3154.016035
## iter 50 value 3148.502897
## iter 60 value 3143.103330
## iter 70 value 2915.842873
## iter 80 value 2445.549042
## iter 90 value 2331.979098
## iter 100 value 2290.704761
## final value 2290.704761
## stopped after 100 iterations
## # weights: 221
## initial value 5640.039318
## iter 10 value 3432.458738
## iter 20 value 3419.936974
## iter 30 value 3230.054366
## iter 40 value 3185.659763
## iter 50 value 3125.218967
## iter 60 value 2764.078395
## iter 70 value 2551.470777
## iter 80 value 2235.565626
## iter 90 value 2152.103686
## iter 100 value 2145.637166
## final value 2145.637166
## stopped after 100 iterations
## # weights: 331
## initial value 4334.585138
## iter 10 value 3380.778017
## iter 20 value 3230.986055
## iter 30 value 3158.903320
## iter 40 value 3133.427884
## iter 50 value 3075.523430
## iter 60 value 3017.364163
## iter 70 value 2872.511040
## iter 80 value 2557.026787
## iter 90 value 2354.912691
## iter 100 value 2281.255712
## final value 2281.255712
## stopped after 100 iterations
## # weights: 551
## initial value 4908.127613
## iter 10 value 3432.850919
## iter 20 value 3431.465120
## iter 30 value 3431.448588
## iter 40 value 3391.253860
## iter 50 value 3253.958662
## iter 60 value 3234.885305
## iter 70 value 3222.927534
## iter 80 value 3206.827815
## iter 90 value 3107.860947
## iter 100 value 2727.685412
## final value 2727.685412
## stopped after 100 iterations
## # weights: 771
## initial value 3908.111529
## iter 10 value 3309.312080
## iter 20 value 3295.153160
## iter 30 value 3213.355031
## iter 40 value 3192.874131
## iter 50 value 3187.021445
## iter 60 value 3133.583477
## iter 70 value 3067.226765
## iter 80 value 3055.803126
## iter 90 value 3048.566165
## iter 100 value 3034.216239
## final value 3034.216239
## stopped after 100 iterations
## # weights: 221
## initial value 4075.563457
## iter 10 value 3432.370516
## iter 20 value 3431.634426
## iter 30 value 3431.625910
## final value 3431.625839
## converged
## # weights: 331
## initial value 6386.033847
## iter 10 value 3425.441704
## iter 20 value 3357.838206
## iter 30 value 3230.540251
## iter 40 value 3113.085735
## iter 50 value 3006.388610
## iter 60 value 2847.370343
## iter 70 value 2550.919182
## iter 80 value 2383.551243
## iter 90 value 2278.314688
## iter 100 value 2150.715107
## final value 2150.715107
## stopped after 100 iterations
## # weights: 551
## initial value 3930.514926
## iter 10 value 3433.365069
## iter 20 value 3431.645895
## iter 30 value 3431.626042
## iter 40 value 3431.596593
## iter 50 value 3226.288461
## iter 60 value 3222.547706
## iter 70 value 3187.020199
## iter 80 value 3091.488115
## iter 90 value 2765.779829
## iter 100 value 2586.354693
## final value 2586.354693
## stopped after 100 iterations
## # weights: 771
## initial value 8343.579679
## iter 10 value 3431.918462
## iter 20 value 3430.843156
## iter 30 value 3429.632044
## iter 40 value 3398.898454
## iter 50 value 3146.853020
## iter 60 value 3067.095610
## iter 70 value 2931.631860
## iter 80 value 2837.089729
## iter 90 value 2491.017877
## iter 100 value 2295.855164
## final value 2295.855164
## stopped after 100 iterations
## # weights: 771
## initial value 5261.960526
## iter 10 value 5103.882260
## iter 20 value 4830.194492
## iter 30 value 4712.332379
## iter 40 value 4706.017582
## iter 50 value 4702.962005
## iter 60 value 4693.893457
## iter 70 value 4689.557466
## iter 80 value 4677.593978
## iter 90 value 4668.091074
## iter 100 value 4662.846200
## final value 4662.846200
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n4_NN1Fit0
## Neural Network
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6692, 6692, 6692
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8311417 0.3426364
## 2 0.5 0.8125125 0.2148859
## 2 0.7 0.8136083 0.1961233
## 3 0.3 0.8262602 0.3795918
## 3 0.5 0.8233712 0.3306511
## 3 0.7 0.8185894 0.2412877
## 5 0.3 0.8345288 0.4235930
## 5 0.5 0.8136083 0.3003756
## 5 0.7 0.8313409 0.3149499
## 7 0.3 0.8406057 0.4273529
## 7 0.5 0.8180913 0.2282293
## 7 0.7 0.8197848 0.2276777
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8493724 0.5189879 Fold2
## 2 0.8547519 0.5402279 Fold1
## 3 0.8176928 0.2228429 Fold3
ad_tda_kde_5.50.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.01 0.24 0.00 0.00 0.00 0.00 0.02 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 -0.01 0.00 0.04
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 -0.01 0.08 0.00 0.00 0.02
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 -0.01 0.00 0.00 0.00 -0.01 0.00 0.05
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -0.01 0.00 0.00 0.00 0.00 0.00 -0.03 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.01 -0.01 0.03 0.00 0.00 0.00 -0.01
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.02 -0.01 0.02
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.06 0.00 0.29 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.06 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 -0.05 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.03 -0.22 0.02 0.00 0.00 0.00 -0.02 0.02
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.05 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.02 0.00 0.07
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 -0.12 -0.26 -0.01 0.00 0.02
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 -0.04 0.00 0.00 0.02 0.03 0.00 0.06
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.03 -0.04 0.00 0.00 0.01 0.00 -0.10 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -0.03 0.00 0.00 0.08 -0.01 0.00 -0.06 0.02
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.06 0.00 0.00 0.01 0.00 -0.03 -0.12 0.09
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.08 0.59 -0.35 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 -0.01 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 -0.02 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.72 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.17 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## -0.61 0.11 -0.04 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## -0.25 -0.30 0.00 -0.65 0.47 0.00 0.24 0.08
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## -0.13 -0.25 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 -0.05 0.43 0.00 -0.31
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 -0.31 -0.97 -0.17 0.00 0.41
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 -0.49 0.00 0.00 0.00 -0.20 0.00 -0.05
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.27 0.16 -0.02 0.00 -0.21 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## -0.02 -0.12 -0.05 0.62 -0.42 -0.05 -0.13 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## -0.27 0.00 -0.21 0.29 -0.12 -0.20 -0.38 0.13
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.01 0.01 0.03 0.06 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 -0.02 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 -0.29 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## -0.08 -1.17 1.44 -0.08 -0.08 0.02 -0.13 2.70
vip(Adult_TDA_KDE_5.50.5_n4_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7341 1886
## >50K 75 466
##
## Accuracy : 0.7992
## 95% CI : (0.7912, 0.8071)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2551
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9899
## Specificity : 0.1981
## Pos Pred Value : 0.7956
## Neg Pred Value : 0.8614
## Prevalence : 0.7592
## Detection Rate : 0.7515
## Detection Prevalence : 0.9446
## Balanced Accuracy : 0.5940
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7341 1886
## >50K 75 466
##
## Accuracy : 0.7992
## 95% CI : (0.7912, 0.8071)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2551
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9899
## Specificity : 0.1981
## Pos Pred Value : 0.7956
## Neg Pred Value : 0.8614
## Prevalence : 0.7592
## Detection Rate : 0.7515
## Detection Prevalence : 0.9446
## Balanced Accuracy : 0.5940
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.992424e-01 2.550720e-01 7.911589e-01 8.071472e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.052017e-21 0.000000e+00
ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9898867 0.1981293 0.7955999
## Neg Pred Value Precision Recall
## 0.8613678 0.7955999 0.9898867
## F1 Prevalence Detection Rate
## 0.8821727 0.7592138 0.7515356
## Detection Prevalence Balanced Accuracy
## 0.9446151 0.5940080
ad_tda_kde_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n4_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n4_3_fold
## Accuracy
## 1 -0.020733269
## 2 -0.008496842
## 3 -0.010769893
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.6112667
##
## $winRope
## [1] 0.3887333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.7386676
##
## $rope
## [1] 0.2448903
##
## $right
## [1] 0.01644208
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
#bf_tda_kde_5.50.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold)
## t = -3.5483, df = 2, p-value = 0.07106
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02950142 0.00283475
## sample estimates:
## mean of x
## -0.01333333
### Test set diff
diff_tda_kde_5.50.5_nn1.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n4_test
## Accuracy
## 0.02856265
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n4_test_odds.left<-bst_tda_kde_5.50.5_nn1.n4_test$probLeft/bst_tda_kde_5.50.5_nn1.n4_test$probRight
bst_tda_kde_5.50.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1583667
##
## $winRight
## [1] 0.8416333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n4_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_KDE_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4380.555309
## iter 10 value 2157.200772
## iter 20 value 2136.771432
## iter 30 value 2008.063300
## iter 40 value 1988.549132
## iter 50 value 1973.538255
## iter 60 value 1965.434521
## iter 70 value 1943.442816
## iter 80 value 1882.459591
## iter 90 value 1664.944270
## iter 100 value 1499.973222
## final value 1499.973222
## stopped after 100 iterations
## # weights: 331
## initial value 3514.825850
## iter 10 value 2089.025595
## iter 20 value 2008.803558
## iter 30 value 2008.340225
## iter 40 value 1998.802716
## iter 50 value 1993.711994
## iter 60 value 1991.508163
## iter 70 value 1990.224360
## iter 80 value 1989.967123
## final value 1989.966604
## converged
## # weights: 551
## initial value 4536.165854
## iter 10 value 2157.769272
## iter 20 value 2150.733742
## iter 30 value 2060.439589
## iter 40 value 2054.759531
## iter 50 value 2032.765884
## iter 60 value 1965.508907
## iter 70 value 1751.913072
## iter 80 value 1612.684795
## iter 90 value 1523.102111
## iter 100 value 1446.366786
## final value 1446.366786
## stopped after 100 iterations
## # weights: 771
## initial value 3404.225218
## iter 10 value 2019.700117
## iter 20 value 1952.667083
## iter 30 value 1745.113401
## iter 40 value 1610.809244
## iter 50 value 1402.996107
## iter 60 value 1364.651790
## iter 70 value 1352.904239
## iter 80 value 1342.568272
## iter 90 value 1338.522050
## iter 100 value 1335.495519
## final value 1335.495519
## stopped after 100 iterations
## # weights: 221
## initial value 2692.167823
## iter 10 value 2025.158023
## iter 20 value 2002.541997
## iter 30 value 1814.541565
## iter 40 value 1599.393312
## iter 50 value 1530.545797
## iter 60 value 1509.197151
## iter 70 value 1501.003902
## iter 80 value 1452.910845
## iter 90 value 1431.181091
## iter 100 value 1414.272088
## final value 1414.272088
## stopped after 100 iterations
## # weights: 331
## initial value 2619.528371
## iter 10 value 2003.386145
## iter 20 value 1971.191886
## iter 30 value 1909.400068
## iter 40 value 1866.290734
## iter 50 value 1696.020747
## iter 60 value 1519.047767
## iter 70 value 1417.451978
## iter 80 value 1386.135711
## iter 90 value 1370.909851
## iter 100 value 1366.375956
## final value 1366.375956
## stopped after 100 iterations
## # weights: 551
## initial value 2528.285958
## iter 10 value 2075.369509
## iter 20 value 2001.464099
## iter 30 value 1998.337990
## iter 40 value 1996.238058
## iter 50 value 1988.324874
## iter 60 value 1907.809262
## iter 70 value 1709.006098
## iter 80 value 1512.656891
## iter 90 value 1455.392647
## iter 100 value 1431.733736
## final value 1431.733736
## stopped after 100 iterations
## # weights: 771
## initial value 2701.350397
## iter 10 value 2161.190472
## iter 20 value 2158.590882
## iter 30 value 2158.558642
## iter 40 value 2112.977794
## iter 50 value 1938.192211
## iter 60 value 1586.783546
## iter 70 value 1513.163037
## iter 80 value 1457.374281
## iter 90 value 1441.160837
## iter 100 value 1411.404872
## final value 1411.404872
## stopped after 100 iterations
## # weights: 221
## initial value 2408.053046
## iter 10 value 2159.068957
## iter 20 value 2158.238885
## iter 30 value 2089.152637
## iter 40 value 2054.637114
## iter 50 value 2038.288170
## iter 60 value 1990.181040
## iter 70 value 1971.967624
## iter 80 value 1965.926715
## iter 90 value 1960.741795
## iter 100 value 1956.025554
## final value 1956.025554
## stopped after 100 iterations
## # weights: 331
## initial value 4361.152570
## iter 10 value 2158.565865
## iter 20 value 2043.305120
## iter 30 value 2009.069500
## iter 40 value 1999.464079
## iter 50 value 1909.627337
## iter 60 value 1750.289500
## iter 70 value 1475.424429
## iter 80 value 1453.086841
## iter 90 value 1451.259056
## iter 100 value 1402.255739
## final value 1402.255739
## stopped after 100 iterations
## # weights: 551
## initial value 5665.089415
## iter 10 value 2038.442414
## iter 20 value 2030.938404
## iter 30 value 2029.128702
## iter 40 value 2009.729056
## iter 50 value 2007.982530
## iter 60 value 2007.109723
## iter 70 value 2005.107031
## iter 80 value 1935.860991
## iter 90 value 1818.397813
## iter 100 value 1625.467296
## final value 1625.467296
## stopped after 100 iterations
## # weights: 771
## initial value 4000.436587
## iter 10 value 2035.955879
## iter 20 value 2022.358252
## iter 30 value 2008.790947
## iter 40 value 1991.878364
## iter 50 value 1987.821493
## iter 60 value 1932.153381
## iter 70 value 1824.230974
## iter 80 value 1699.334329
## iter 90 value 1572.655875
## iter 100 value 1498.425695
## final value 1498.425695
## stopped after 100 iterations
## # weights: 221
## initial value 2504.690137
## iter 10 value 2157.643913
## iter 20 value 2139.349481
## iter 30 value 2096.130398
## iter 40 value 2010.835067
## iter 50 value 1933.945638
## iter 60 value 1906.481855
## iter 70 value 1852.890897
## iter 80 value 1847.592656
## iter 90 value 1630.567064
## iter 100 value 1506.629972
## final value 1506.629972
## stopped after 100 iterations
## # weights: 331
## initial value 3471.197015
## iter 10 value 2078.498216
## iter 20 value 2003.338160
## iter 30 value 2000.348788
## iter 40 value 2000.328316
## final value 2000.328200
## converged
## # weights: 551
## initial value 3760.798736
## iter 10 value 2026.078061
## iter 20 value 1988.800931
## iter 30 value 1981.092025
## iter 40 value 1963.829479
## iter 50 value 1963.308508
## iter 60 value 1952.238986
## iter 70 value 1943.954518
## iter 80 value 1932.455433
## iter 90 value 1802.352825
## iter 100 value 1492.742762
## final value 1492.742762
## stopped after 100 iterations
## # weights: 771
## initial value 5637.712107
## iter 10 value 2031.692297
## iter 20 value 1979.474055
## iter 30 value 1954.483910
## iter 40 value 1918.608040
## iter 50 value 1864.462474
## iter 60 value 1798.900650
## iter 70 value 1592.764573
## iter 80 value 1452.235041
## iter 90 value 1397.859937
## iter 100 value 1386.782706
## final value 1386.782706
## stopped after 100 iterations
## # weights: 221
## initial value 3398.111972
## iter 10 value 2030.918059
## iter 20 value 2013.828491
## iter 30 value 2005.991306
## iter 40 value 1976.604078
## iter 50 value 1831.421867
## iter 60 value 1515.106640
## iter 70 value 1474.370091
## iter 80 value 1419.717761
## iter 90 value 1399.861306
## iter 100 value 1361.397061
## final value 1361.397061
## stopped after 100 iterations
## # weights: 331
## initial value 2739.331892
## iter 10 value 2140.093965
## iter 20 value 2064.592327
## iter 30 value 2057.197058
## iter 40 value 2018.011101
## iter 50 value 1988.375691
## iter 60 value 1983.456966
## iter 70 value 1982.982158
## iter 80 value 1979.731602
## iter 90 value 1893.861236
## iter 100 value 1744.766959
## final value 1744.766959
## stopped after 100 iterations
## # weights: 551
## initial value 3737.702977
## iter 10 value 2147.153698
## iter 20 value 2034.668818
## iter 30 value 2019.939108
## iter 40 value 2018.555230
## iter 50 value 1991.592019
## iter 60 value 1911.890654
## iter 70 value 1797.716304
## iter 80 value 1768.944402
## iter 90 value 1659.816705
## iter 100 value 1516.902187
## final value 1516.902187
## stopped after 100 iterations
## # weights: 771
## initial value 4869.290838
## iter 10 value 2149.252094
## iter 20 value 2141.312975
## iter 30 value 2141.188052
## iter 40 value 2012.984186
## iter 50 value 1994.607218
## iter 60 value 1988.611143
## iter 70 value 1982.341249
## iter 80 value 1968.062930
## iter 90 value 1962.695092
## iter 100 value 1949.801118
## final value 1949.801118
## stopped after 100 iterations
## # weights: 221
## initial value 4417.828990
## iter 10 value 2162.923309
## iter 20 value 2160.507500
## iter 30 value 2160.463759
## iter 40 value 2036.944225
## iter 50 value 2002.387159
## iter 60 value 1938.472402
## iter 70 value 1834.617569
## iter 80 value 1735.712937
## iter 90 value 1534.652755
## iter 100 value 1443.666918
## final value 1443.666918
## stopped after 100 iterations
## # weights: 331
## initial value 5770.462788
## iter 10 value 2156.318761
## iter 20 value 2152.603018
## iter 30 value 2026.959523
## iter 40 value 1755.754664
## iter 50 value 1668.813315
## iter 60 value 1431.635754
## iter 70 value 1396.968555
## iter 80 value 1389.392470
## iter 90 value 1382.678508
## iter 100 value 1376.545022
## final value 1376.545022
## stopped after 100 iterations
## # weights: 551
## initial value 3929.810502
## iter 10 value 2159.430694
## iter 20 value 2158.077747
## iter 30 value 2158.062149
## final value 2158.062033
## converged
## # weights: 771
## initial value 2640.382834
## iter 10 value 2087.471045
## iter 20 value 2005.490969
## iter 30 value 1992.216901
## iter 40 value 1990.064644
## iter 50 value 1980.781323
## iter 60 value 1966.444964
## iter 70 value 1787.320069
## iter 80 value 1595.363469
## iter 90 value 1515.326714
## iter 100 value 1483.300397
## final value 1483.300397
## stopped after 100 iterations
## # weights: 221
## initial value 3333.132016
## iter 10 value 2025.840440
## iter 20 value 2021.560910
## iter 30 value 2019.575744
## iter 40 value 2017.087846
## iter 50 value 2009.711444
## iter 60 value 1984.735809
## iter 70 value 1973.863716
## iter 80 value 1954.179000
## iter 90 value 1946.336430
## iter 100 value 1694.432985
## final value 1694.432985
## stopped after 100 iterations
## # weights: 331
## initial value 2641.981451
## iter 10 value 2159.690080
## iter 20 value 2158.217901
## iter 30 value 2108.428371
## iter 40 value 2036.789978
## iter 50 value 2030.136373
## iter 60 value 2005.012307
## iter 70 value 1850.870902
## iter 80 value 1651.702846
## iter 90 value 1506.914267
## iter 100 value 1487.734658
## final value 1487.734658
## stopped after 100 iterations
## # weights: 551
## initial value 2631.571977
## iter 10 value 2066.877330
## iter 20 value 2009.657783
## iter 30 value 1990.181462
## iter 40 value 1885.814984
## iter 50 value 1686.955939
## iter 60 value 1586.066984
## iter 70 value 1444.701000
## iter 80 value 1418.707296
## iter 90 value 1418.074196
## iter 100 value 1396.709796
## final value 1396.709796
## stopped after 100 iterations
## # weights: 771
## initial value 10049.454894
## iter 10 value 2145.880098
## iter 20 value 2057.682078
## iter 30 value 1982.481328
## iter 40 value 1948.522216
## iter 50 value 1862.203402
## iter 60 value 1858.655962
## iter 70 value 1740.297559
## iter 80 value 1507.627273
## iter 90 value 1478.804216
## iter 100 value 1464.176905
## final value 1464.176905
## stopped after 100 iterations
## # weights: 221
## initial value 4413.458192
## iter 10 value 2161.364644
## iter 20 value 2159.844151
## iter 30 value 2159.739362
## final value 2159.739223
## converged
## # weights: 331
## initial value 3963.492731
## iter 10 value 2037.691119
## iter 20 value 2012.514770
## iter 30 value 1992.766271
## iter 40 value 1937.271315
## iter 50 value 1724.787835
## iter 60 value 1580.636685
## iter 70 value 1492.128061
## iter 80 value 1468.181366
## iter 90 value 1426.053434
## iter 100 value 1389.949247
## final value 1389.949247
## stopped after 100 iterations
## # weights: 551
## initial value 6250.991290
## iter 10 value 2090.701735
## iter 20 value 2013.977159
## iter 30 value 1997.215547
## iter 40 value 1990.539971
## iter 50 value 1985.862750
## iter 60 value 1980.338522
## iter 70 value 1975.648474
## iter 80 value 1972.441812
## iter 90 value 1971.573603
## iter 100 value 1971.407792
## final value 1971.407792
## stopped after 100 iterations
## # weights: 771
## initial value 4164.337088
## iter 10 value 2059.286435
## iter 20 value 1998.593424
## iter 30 value 1507.246475
## iter 40 value 1418.297931
## iter 50 value 1380.504695
## iter 60 value 1361.812422
## iter 70 value 1353.939485
## iter 80 value 1348.997327
## iter 90 value 1342.885919
## iter 100 value 1333.149179
## final value 1333.149179
## stopped after 100 iterations
## # weights: 221
## initial value 6012.600004
## iter 10 value 2160.271632
## iter 20 value 2141.458587
## iter 30 value 2044.511163
## iter 40 value 1962.413351
## iter 50 value 1669.366623
## iter 60 value 1572.982507
## iter 70 value 1525.050207
## iter 80 value 1432.397493
## iter 90 value 1392.762683
## iter 100 value 1380.619870
## final value 1380.619870
## stopped after 100 iterations
## # weights: 331
## initial value 2409.581373
## iter 10 value 2035.056273
## iter 20 value 2019.657824
## iter 30 value 2019.145037
## iter 40 value 2018.992402
## final value 2018.990567
## converged
## # weights: 551
## initial value 4443.038008
## iter 10 value 2164.832428
## iter 20 value 2160.327999
## iter 30 value 2157.576348
## iter 40 value 2041.009165
## iter 50 value 2036.444743
## iter 60 value 2032.571473
## iter 70 value 2031.850003
## iter 80 value 2022.871134
## iter 90 value 2020.151944
## iter 100 value 2019.944731
## final value 2019.944731
## stopped after 100 iterations
## # weights: 771
## initial value 7803.890149
## iter 10 value 2057.420033
## iter 20 value 2045.146798
## iter 30 value 2015.976940
## iter 40 value 1990.015911
## iter 50 value 1986.657525
## iter 60 value 1981.740359
## iter 70 value 1973.218389
## iter 80 value 1942.504964
## iter 90 value 1863.992888
## iter 100 value 1830.428985
## final value 1830.428985
## stopped after 100 iterations
## # weights: 771
## initial value 4796.789636
## iter 10 value 3217.733477
## iter 20 value 3028.559525
## iter 30 value 2956.919815
## iter 40 value 2819.903214
## iter 50 value 2670.976447
## iter 60 value 2496.590839
## iter 70 value 2481.830564
## iter 80 value 2423.205114
## iter 90 value 2185.694959
## iter 100 value 2072.161532
## final value 2072.161532
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n5_NN1Fit0
## Neural Network
##
## 7540 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5027, 5026, 5027
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8641914 0.1982213
## 2 0.5 0.8623328 0.2523514
## 2 0.7 0.8652527 0.2552265
## 3 0.3 0.8622020 0.1884833
## 3 0.5 0.8668446 0.2925104
## 3 0.7 0.8664450 0.3367942
## 5 0.3 0.8659154 0.2781519
## 5 0.5 0.8635282 0.2920675
## 5 0.7 0.8578265 0.1491520
## 7 0.3 0.8680374 0.3510028
## 7 0.5 0.8680380 0.3352310
## 7 0.7 0.8571630 0.2546601
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.5.
Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8611774 0.1749223 Fold2
## 2 0.8647035 0.4110519 Fold1
## 3 0.8782332 0.4197190 Fold3
ad_tda_kde_5.50.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -0.36 0.10 -0.76 1.46 -0.24 -0.05 -0.17 0.44
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.73 -0.23 -0.09 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 1.30 0.00 0.00 -1.51
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 -0.15 -0.79 -0.69 0.68 1.68
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.08 -1.00 -0.92 -0.03 -0.80 -0.02 -0.09 -0.12
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.68 -1.18 -1.01 -0.29 -0.96 -0.31 1.67 0.65
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.22 1.78 -0.57 0.30 -0.25 -0.64 -1.20 -0.58
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 2.01 -1.37 1.02 0.35 -0.96 0.61 -0.57 0.21
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.06 0.16 -0.11 -0.16 0.66 -0.42
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.74 -0.47 0.31 -0.61 0.01 -0.03 0.17 -0.09
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.35 -0.11 0.00 -0.08 -0.29 -0.07 -0.16 0.11
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.05 -0.24 0.89 -0.47 -0.27 -0.57 -0.30 -0.06
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.14 0.80 -0.29 0.05 -0.50 -0.19 0.08 -0.16
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.05 -0.22 0.62 0.21 0.45
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.05 0.05 -0.03 0.19 0.04 0.00 -0.27 0.53
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.31 -0.10 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.07 0.00 0.00 0.42
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 -0.44 0.17 -0.85 -0.09 0.04
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.01 1.11 -0.13 -0.04 -0.02 0.61 0.01 -0.73
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.16 0.11 -0.22 0.35 0.18 0.07 -0.38 -0.31
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.20 -0.01 0.04 0.06 -0.45 0.11 0.75 -0.42
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 -0.06 0.11 -0.29 0.04 0.25 0.54 -0.49
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 0.08 0.14 -0.01 0.08 0.00 -0.01
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## -0.09 0.00 0.04 0.03 0.05 0.00 0.03 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## -0.07 0.02 0.00 0.00 -0.01 0.00 -0.02 0.03
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## -0.01 0.08 -0.06 -0.02 -0.01 -0.28 0.00 0.01
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.11 0.04 0.00 0.14 -0.01 -0.01 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## -0.01 0.00 -0.14 0.01 0.03
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## -0.01 -0.15 -0.01 -0.01 -0.05 0.00 -0.11 0.09
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.04 0.03 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 -0.03 0.00 0.00 -0.16
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.17 -0.02 -0.10 0.00 0.11
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.01 -0.03 0.00 -0.01 0.02 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.01 0.00 -0.08 0.03 0.10 0.00 -0.07 -0.03
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.09 0.03 -0.12 -0.09 -0.13 -0.01 -0.05 0.05
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.22 -0.05 0.08 0.00 0.00 -0.04 0.23 -0.24
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## -0.01 -0.04 0.44 0.11 0.00 -0.01 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.11 0.00 0.00 -0.07 0.02 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 -0.01 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 -0.16 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## -0.52 4.52 -0.04 -0.48 -1.75 -0.38 -0.48 -0.88
vip(Adult_TDA_KDE_5.50.5_n5_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6864 1660
## >50K 552 692
##
## Accuracy : 0.7735
## 95% CI : (0.7651, 0.7818)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0004467
##
## Kappa : 0.2619
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9256
## Specificity : 0.2942
## Pos Pred Value : 0.8053
## Neg Pred Value : 0.5563
## Prevalence : 0.7592
## Detection Rate : 0.7027
## Detection Prevalence : 0.8726
## Balanced Accuracy : 0.6099
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6864 1660
## >50K 552 692
##
## Accuracy : 0.7735
## 95% CI : (0.7651, 0.7818)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0004467
##
## Kappa : 0.2619
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9256
## Specificity : 0.2942
## Pos Pred Value : 0.8053
## Neg Pred Value : 0.5563
## Prevalence : 0.7592
## Detection Rate : 0.7027
## Detection Prevalence : 0.8726
## Balanced Accuracy : 0.6099
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.735463e-01 2.619098e-01 7.651142e-01 7.818150e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.466916e-04 1.697207e-122
ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9255663 0.2942177 0.8052557
## Neg Pred Value Precision Recall
## 0.5562701 0.8052557 0.9255663
## F1 Prevalence Detection Rate
## 0.8612296 0.7592138 0.7027027
## Detection Prevalence Balanced Accuracy
## 0.8726454 0.6098920
ad_tda_kde_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n5_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n5_3_fold
## Accuracy
## 1 -0.03253829
## 2 -0.01844844
## 3 -0.07131031
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9645
##
## $winRope
## [1] 0.0355
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.8830625
##
## $rope
## [1] 0.06264481
##
## $right
## [1] 0.05429269
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
#bf_tda_kde_5.50.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold)
## t = -2.5793, df = 2, p-value = 0.1231
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.10876765 0.02723628
## sample estimates:
## mean of x
## -0.04076568
### Test set diff
diff_tda_kde_5.50.5_nn1.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n5_test
## Accuracy
## 0.0542588
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nn1.n5_test_odds.left<-bst_tda_kde_5.50.5_nn1.n5_test$probLeft/bst_tda_kde_5.50.5_nn1.n5_test$probRight
bst_tda_kde_5.50.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1550667
##
## $winRight
## [1] 0.8449333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n5_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test))
##Logistic Regression
adultLrFit <- train(as.factor(adult_df1) ~ .,
data = adult.one_hot_df4Train,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results:
##
## Accuracy Kappa
## 0.8500417 0.5639457
adultLrFit$resample
## Accuracy Kappa Resample
## 1 0.8511254 0.5647397 Fold1
## 2 0.8494341 0.5602067 Fold2
## 3 0.8495657 0.5668908 Fold3
ad_lr_fit_re<-adultLrFit$resample[1]
summary(adultLrFit)
##
## Call:
## NULL
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.684e+12 3.378e+12 0.794 0.427012
## V1 2.493e-02 1.973e-03 12.639 < 2e-16 ***
## V2.. -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Federal.gov -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Local.gov -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Never.worked -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Private -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Self.emp.inc -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Self.emp.not.inc -2.684e+12 3.378e+12 -0.794 0.427012
## V2.State.gov -2.684e+12 3.378e+12 -0.794 0.427012
## V2.Without.pay -2.684e+12 3.378e+12 -0.794 0.427012
## V3 4.279e-07 2.065e-07 2.072 0.038232 *
## V4.10th -1.141e+00 1.892e-01 -6.032 1.62e-09 ***
## V4.11th -1.118e+00 1.822e-01 -6.137 8.41e-10 ***
## V4.12th -7.823e-01 2.673e-01 -2.927 0.003426 **
## V4.1st.4th -1.690e+00 5.344e-01 -3.162 0.001568 **
## V4.5th.6th -1.812e+00 3.874e-01 -4.678 2.90e-06 ***
## V4.7th.8th -1.769e+00 2.324e-01 -7.613 2.68e-14 ***
## V4.9th -1.323e+00 2.456e-01 -5.388 7.12e-08 ***
## V4.Assoc.acdm 8.067e-02 1.155e-01 0.698 0.484945
## V4.Assoc.voc 2.085e-01 1.025e-01 2.035 0.041860 *
## V4.Bachelors 6.892e-01 6.664e-02 10.343 < 2e-16 ***
## V4.Doctorate 1.930e+00 1.933e-01 9.981 < 2e-16 ***
## V4.HS.grad -3.922e-01 5.994e-02 -6.543 6.03e-11 ***
## V4.Masters 1.038e+00 9.727e-02 10.669 < 2e-16 ***
## V4.Preschool -3.140e+01 3.838e+04 -0.001 0.999347
## V4.Prof.school 1.615e+00 1.577e-01 10.237 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 9.411e-02 1.947e-01 0.483 0.628925
## V6.Married.AF.spouse 2.864e+00 6.256e-01 4.578 4.68e-06 ***
## V6.Married.civ.spouse 2.435e+00 3.708e-01 6.568 5.10e-11 ***
## V6.Married.spouse.absent 3.207e-02 3.186e-01 0.101 0.919825
## V6.Never.married -4.464e-01 2.026e-01 -2.203 0.027592 *
## V6.Separated -9.802e-03 2.589e-01 -0.038 0.969796
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -1.271e-02 1.203e-01 -0.106 0.915873
## V7.Armed.Forces -8.642e-01 1.684e+00 -0.513 0.607920
## V7.Craft.repair 1.696e-01 1.010e-01 1.679 0.093107 .
## V7.Exec.managerial 8.454e-01 1.047e-01 8.074 6.80e-16 ***
## V7.Farming.fishing -9.421e-01 1.702e-01 -5.535 3.11e-08 ***
## V7.Handlers.cleaners -6.985e-01 1.756e-01 -3.978 6.95e-05 ***
## V7.Machine.op.inspct -1.907e-01 1.281e-01 -1.489 0.136533
## V7.Other.service -7.682e-01 1.474e-01 -5.213 1.86e-07 ***
## V7.Priv.house.serv -3.825e+00 1.641e+00 -2.331 0.019736 *
## V7.Prof.specialty 5.982e-01 1.120e-01 5.343 9.13e-08 ***
## V7.Protective.serv 6.015e-01 1.567e-01 3.839 0.000124 ***
## V7.Sales 2.929e-01 1.080e-01 2.712 0.006679 **
## V7.Tech.support 7.197e-01 1.422e-01 5.063 4.13e-07 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.425e+00 1.238e-01 -11.510 < 2e-16 ***
## V8.Not.in.family -6.970e-01 3.375e-01 -2.065 0.038906 *
## V8.Other.relative -1.913e+00 3.100e-01 -6.171 6.80e-10 ***
## V8.Own.child -2.032e+00 3.295e-01 -6.166 7.00e-10 ***
## V8.Unmarried -8.062e-01 3.481e-01 -2.316 0.020566 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -6.055e-01 2.765e-01 -2.190 0.028544 *
## V9.Asian.Pac.Islander 4.559e-02 1.810e-01 0.252 0.801159
## V9.Black -1.676e-01 9.204e-02 -1.821 0.068598 .
## V9.Other -3.852e-01 3.374e-01 -1.142 0.253648
## V9.White NA NA NA NA
## V10.Female -8.607e-01 9.598e-02 -8.967 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.158e-04 1.256e-05 25.152 < 2e-16 ***
## V12 6.765e-04 4.501e-05 15.029 < 2e-16 ***
## V13 3.112e-02 1.952e-03 15.942 < 2e-16 ***
## V14.. -1.885e+00 1.053e+00 -1.791 0.073256 .
## V14.Cambodia 2.758e-01 1.350e+00 0.204 0.838087
## V14.Canada -1.634e+00 1.093e+00 -1.495 0.134823
## V14.China -2.613e+00 1.131e+00 -2.310 0.020907 *
## V14.Columbia -3.697e+00 1.349e+00 -2.742 0.006115 **
## V14.Cuba -1.336e+00 1.104e+00 -1.211 0.226028
## V14.Dominican.Republic -3.222e+00 1.483e+00 -2.173 0.029789 *
## V14.Ecuador -1.724e+00 1.278e+00 -1.349 0.177185
## V14.El.Salvador -2.675e+00 1.205e+00 -2.221 0.026351 *
## V14.England -1.564e+00 1.090e+00 -1.435 0.151364
## V14.France -1.132e+00 1.196e+00 -0.946 0.344143
## V14.Germany -1.175e+00 1.084e+00 -1.084 0.278365
## V14.Greece -2.642e+00 1.230e+00 -2.149 0.031668 *
## V14.Guatemala -2.477e+00 1.422e+00 -1.742 0.081480 .
## V14.Haiti -2.242e+00 1.396e+00 -1.606 0.108362
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.969e+00 2.845e+00 -1.044 0.296702
## V14.Hong -2.262e+00 1.578e+00 -1.434 0.151697
## V14.Hungary -2.349e+00 1.386e+00 -1.694 0.090285 .
## V14.India -1.652e+00 1.109e+00 -1.490 0.136334
## V14.Iran -1.307e+00 1.159e+00 -1.128 0.259404
## V14.Ireland -9.512e-01 1.240e+00 -0.767 0.443153
## V14.Italy -7.117e-01 1.104e+00 -0.645 0.518997
## V14.Jamaica -1.563e+00 1.164e+00 -1.342 0.179615
## V14.Japan -1.364e+00 1.138e+00 -1.199 0.230650
## V14.Laos -2.871e+00 1.565e+00 -1.834 0.066688 .
## V14.Mexico -2.193e+00 1.071e+00 -2.047 0.040629 *
## V14.Nicaragua -2.310e+00 1.522e+00 -1.518 0.129041
## V14.Outlying.US.Guam.USVI.etc. -2.601e+01 8.470e+04 0.000 0.999755
## V14.Peru -2.723e+00 1.500e+00 -1.816 0.069372 .
## V14.Philippines -9.092e-01 1.080e+00 -0.842 0.399974
## V14.Poland -1.759e+00 1.133e+00 -1.552 0.120553
## V14.Portugal -1.543e+00 1.223e+00 -1.261 0.207218
## V14.Puerto.Rico -2.205e+00 1.122e+00 -1.965 0.049435 *
## V14.Scotland -1.943e+00 1.414e+00 -1.374 0.169322
## V14.South -2.295e+00 1.150e+00 -1.996 0.045984 *
## V14.Taiwan -1.875e+00 1.193e+00 -1.572 0.115858
## V14.Thailand -2.214e+00 1.362e+00 -1.625 0.104123
## V14.Trinadad.Tobago -9.988e-01 1.443e+00 -0.692 0.488746
## V14.United.States -1.550e+00 1.039e+00 -1.492 0.135814
## V14.Vietnam -3.188e+00 1.322e+00 -2.411 0.015894 *
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25165 on 22792 degrees of freedom
## Residual deviance: 14361 on 22694 degrees of freedom
## AIC: 14559
##
## Number of Fisher Scoring iterations: 25
vip(adultLrFit,25) + ggtitle('non-TDA-Assisted LR')

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6893 917
## >50K 523 1435
##
## Accuracy : 0.8526
## 95% CI : (0.8454, 0.8596)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5723
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9295
## Specificity : 0.6101
## Pos Pred Value : 0.8826
## Neg Pred Value : 0.7329
## Prevalence : 0.7592
## Detection Rate : 0.7057
## Detection Prevalence : 0.7995
## Balanced Accuracy : 0.7698
##
## 'Positive' Class : <=50K
##
lr_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.525799e-01 5.723299e-01 8.453930e-01 8.595557e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.445581e-115 3.911751e-25
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9294768 0.6101190 0.8825864
## Neg Pred Value Precision Recall
## 0.7328907 0.8825864 0.9294768
## F1 Prevalence Detection Rate
## 0.9054249 0.7592138 0.7056716
## Detection Prevalence Balanced Accuracy
## 0.7995495 0.7697979
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, family = 'binomial')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0
## Generalized Linear Model
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3279, 3278, 3277
## Resampling results:
##
## Accuracy Kappa
## 0.857161 0.03390148
Adult_TDA_PC_5.50.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.6300366 0.073586480 Fold1
## 2 0.9719341 -0.002339865 Fold2
## 3 0.9695122 0.030457813 Fold3
ad_tda_pc_5.50.5_n1_lr_fit_re<-Adult_TDA_PC_5.50.5_n1_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (25 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.754e+15 1.035e+08 65242260 <2e-16 ***
## V1 -4.021e+13 1.001e+05 -401739559 <2e-16 ***
## V2.. 1.555e+15 1.144e+07 135880986 <2e-16 ***
## V2.Federal.gov -8.210e+14 6.396e+06 -128368422 <2e-16 ***
## V2.Local.gov -3.322e+14 5.648e+06 -58813598 <2e-16 ***
## V2.Never.worked NA NA NA NA
## V2.Private 2.257e+14 4.636e+06 48689222 <2e-16 ***
## V2.Self.emp.inc -1.030e+15 5.363e+06 -192126826 <2e-16 ***
## V2.Self.emp.not.inc -1.764e+15 5.342e+06 -330201149 <2e-16 ***
## V2.State.gov NA NA NA NA
## V2.Without.pay NA NA NA NA
## V3 1.936e+08 9.807e+00 19741148 <2e-16 ***
## V4.10th 5.343e+14 1.892e+07 28232875 <2e-16 ***
## V4.11th 1.643e+15 2.266e+07 72471752 <2e-16 ***
## V4.12th -5.781e+14 3.020e+07 -19138755 <2e-16 ***
## V4.1st.4th 1.762e+15 6.954e+07 25333567 <2e-16 ***
## V4.5th.6th 1.178e+15 3.942e+07 29884200 <2e-16 ***
## V4.7th.8th 1.870e+15 1.548e+07 120788873 <2e-16 ***
## V4.9th 1.995e+15 3.027e+07 65929241 <2e-16 ***
## V4.Assoc.acdm -6.403e+14 5.949e+06 -107617549 <2e-16 ***
## V4.Assoc.voc -4.419e+14 5.274e+06 -83784354 <2e-16 ***
## V4.Bachelors -9.810e+14 3.195e+06 -307070713 <2e-16 ***
## V4.Doctorate -2.241e+15 5.430e+06 -412621450 <2e-16 ***
## V4.HS.grad 1.710e+14 3.533e+06 48411646 <2e-16 ***
## V4.Masters -1.096e+15 3.832e+06 -286104624 <2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school -1.241e+15 4.949e+06 -250795563 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 2.246e+15 6.983e+07 32166187 <2e-16 ***
## V6.Married.AF.spouse 5.415e+15 1.067e+08 50765144 <2e-16 ***
## V6.Married.civ.spouse 3.561e+15 9.547e+07 37296901 <2e-16 ***
## V6.Married.spouse.absent -3.913e+14 9.503e+07 -4116981 <2e-16 ***
## V6.Never.married -1.548e+14 7.763e+07 -1993868 <2e-16 ***
## V6.Separated NA NA NA NA
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -8.947e+13 8.646e+06 -10348260 <2e-16 ***
## V7.Armed.Forces 1.572e+15 6.759e+07 23257677 <2e-16 ***
## V7.Craft.repair -3.669e+12 5.511e+06 -665768 <2e-16 ***
## V7.Exec.managerial 3.655e+14 5.294e+06 69045202 <2e-16 ***
## V7.Farming.fishing -5.738e+14 8.252e+06 -69535707 <2e-16 ***
## V7.Handlers.cleaners 5.386e+14 1.867e+07 28841951 <2e-16 ***
## V7.Machine.op.inspct 7.351e+14 1.057e+07 69529076 <2e-16 ***
## V7.Other.service 2.137e+15 1.938e+07 110275000 <2e-16 ***
## V7.Priv.house.serv NA NA NA NA
## V7.Prof.specialty 3.170e+14 5.589e+06 56730038 <2e-16 ***
## V7.Protective.serv -5.345e+14 7.672e+06 -69670508 <2e-16 ***
## V7.Sales 8.977e+14 5.694e+06 157643421 <2e-16 ***
## V7.Tech.support 7.911e+14 7.616e+06 103877755 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.433e+15 1.969e+07 -72760359 <2e-16 ***
## V8.Not.in.family 1.555e+15 7.050e+07 22057608 <2e-16 ***
## V8.Other.relative -4.701e+13 7.015e+07 -670114 <2e-16 ***
## V8.Own.child NA NA NA NA
## V8.Unmarried -5.367e+14 8.790e+07 -6106524 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 8.136e+14 2.250e+07 36160676 <2e-16 ***
## V9.Asian.Pac.Islander -1.535e+15 9.355e+06 -164127666 <2e-16 ***
## V9.Black -1.255e+15 7.568e+06 -165859104 <2e-16 ***
## V9.Other 1.048e+15 2.296e+07 45649335 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female NA NA NA NA
## V10.Male NA NA NA NA
## V11 1.515e+10 6.103e+01 248215727 <2e-16 ***
## V12 1.548e+11 1.518e+03 101924136 <2e-16 ***
## V13 -4.157e+13 8.947e+04 -464636898 <2e-16 ***
## V14.. -2.127e+14 3.449e+07 -6166348 <2e-16 ***
## V14.Cambodia 1.804e+15 5.903e+07 30563828 <2e-16 ***
## V14.Canada -9.754e+14 3.623e+07 -26924014 <2e-16 ***
## V14.China 3.139e+14 3.899e+07 8050228 <2e-16 ***
## V14.Columbia 2.155e+15 7.535e+07 28607405 <2e-16 ***
## V14.Cuba -1.569e+15 3.936e+07 -39865728 <2e-16 ***
## V14.Dominican.Republic NA NA NA NA
## V14.Ecuador 2.000e+14 5.830e+07 3431112 <2e-16 ***
## V14.El.Salvador 5.352e+14 4.521e+07 11838315 <2e-16 ***
## V14.England 2.845e+13 3.794e+07 749833 <2e-16 ***
## V14.France -3.013e+14 4.125e+07 -7303826 <2e-16 ***
## V14.Germany -3.903e+14 3.639e+07 -10723816 <2e-16 ***
## V14.Greece -3.609e+15 4.359e+07 -82783695 <2e-16 ***
## V14.Guatemala NA NA NA NA
## V14.Haiti NA NA NA NA
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras NA NA NA NA
## V14.Hong 2.628e+14 4.616e+07 5693624 <2e-16 ***
## V14.Hungary 6.213e+14 5.836e+07 10646011 <2e-16 ***
## V14.India 3.249e+13 3.665e+07 886349 <2e-16 ***
## V14.Iran -3.496e+15 3.831e+07 -91256665 <2e-16 ***
## V14.Ireland -2.987e+15 5.141e+07 -58110019 <2e-16 ***
## V14.Italy 3.034e+13 3.807e+07 797040 <2e-16 ***
## V14.Jamaica 2.904e+15 7.572e+07 38349803 <2e-16 ***
## V14.Japan 5.594e+14 3.867e+07 14465190 <2e-16 ***
## V14.Laos 2.427e+15 7.625e+07 31826490 <2e-16 ***
## V14.Mexico 1.998e+14 3.917e+07 5100435 <2e-16 ***
## V14.Nicaragua NA NA NA NA
## V14.Outlying.US.Guam.USVI.etc. NA NA NA NA
## V14.Peru -1.239e+15 7.521e+07 -16478711 <2e-16 ***
## V14.Philippines -1.491e+15 3.695e+07 -40349667 <2e-16 ***
## V14.Poland -2.029e+14 4.348e+07 -4666562 <2e-16 ***
## V14.Portugal 1.810e+13 5.836e+07 310228 <2e-16 ***
## V14.Puerto.Rico -5.417e+14 4.800e+07 -11284445 <2e-16 ***
## V14.Scotland 2.444e+15 7.521e+07 32494881 <2e-16 ***
## V14.South -2.945e+14 4.099e+07 -7184583 <2e-16 ***
## V14.Taiwan 1.731e+15 3.897e+07 44411006 <2e-16 ***
## V14.Thailand 1.398e+13 5.903e+07 236749 <2e-16 ***
## V14.Trinadad.Tobago NA NA NA NA
## V14.United.States -8.790e+14 3.373e+07 -26058180 <2e-16 ***
## V14.Vietnam 3.345e+15 7.597e+07 44023473 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1208.3 on 4916 degrees of freedom
## Residual deviance: 9587.6 on 4833 degrees of freedom
## AIC: 9755.6
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n1_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 47 13
## >50K 7369 2339
##
## Accuracy : 0.2443
## 95% CI : (0.2358, 0.2529)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 4e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.006338
## Specificity : 0.994473
## Pos Pred Value : 0.783333
## Neg Pred Value : 0.240935
## Prevalence : 0.759214
## Detection Rate : 0.004812
## Detection Prevalence : 0.006143
## Balanced Accuracy : 0.500405
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 47 13
## >50K 7369 2339
##
## Accuracy : 0.2443
## 95% CI : (0.2358, 0.2529)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 4e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.006338
## Specificity : 0.994473
## Pos Pred Value : 0.783333
## Neg Pred Value : 0.240935
## Prevalence : 0.759214
## Detection Rate : 0.004812
## Detection Prevalence : 0.006143
## Balanced Accuracy : 0.500405
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2442669943 0.0003919283 0.2357726063 0.2529141141 0.7592137592
## AccuracyPValue McnemarPValue
## 1.0000000000 0.0000000000
ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.006337648 0.994472789 0.783333333
## Neg Pred Value Precision Recall
## 0.240935311 0.783333333 0.006337648
## F1 Prevalence Detection Rate
## 0.012573569 0.759213759 0.004811630
## Detection Prevalence Balanced Accuracy
## 0.006142506 0.500405219
ad_tda_pc_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n1_lr_fit_re)
diff_tda_pca_5.50.5_lr_n1_3_fold
## Accuracy
## 1 0.2210888
## 2 -0.1225000
## 3 -0.1199465
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n1_3_fold$probRight
bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.5395667
##
## $winRope
## [1] 0.01743333
##
## $winRight
## [1] 0.443
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_3_fold
## $left
## [1] 0.4922709
##
## $rope
## [1] 0.05347321
##
## $right
## [1] 0.4542559
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
#bf_tda_pca_5.50.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold)
## t = -0.062391, df = 2, p-value = 0.9559
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.4980795 0.4838410
## sample estimates:
## mean of x
## -0.00711925
### Test set diff
diff_tda_pca_5.50.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n1_test
## Accuracy
## 0.6083129
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n1_test_odds.left<-bst_tda_pca_5.50.5_lr.n1_test$probLeft/bst_tda_pca_5.50.5_lr.n1_test$probRight
bst_tda_pca_5.50.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1572667
##
## $winRight
## [1] 0.8427333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n2_LrFit0
## Generalized Linear Model
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8138, 8136, 8138
## Resampling results:
##
## Accuracy Kappa
## 0.7168597 0.4317041
Adult_TDA_PC_5.50.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.7227139 0.4433708 Fold1
## 2 0.7221130 0.4431247 Fold2
## 3 0.7057522 0.4086167 Fold3
ad_tda_pc_5.50.5_n2_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.967e+13 2.870e+13 -6.850e-01 0.493065
## V1 1.336e-02 2.046e-03 6.528e+00 6.67e-11 ***
## V2.. -4.308e+12 8.550e+12 -5.040e-01 0.614376
## V2.Federal.gov -4.308e+12 8.551e+12 -5.040e-01 0.614396
## V2.Local.gov -4.308e+12 8.551e+12 -5.040e-01 0.614388
## V2.Never.worked NA NA NA NA
## V2.Private -4.308e+12 8.551e+12 -5.040e-01 0.614408
## V2.Self.emp.inc -4.308e+12 8.551e+12 -5.040e-01 0.614386
## V2.Self.emp.not.inc -4.308e+12 8.551e+12 -5.040e-01 0.614388
## V2.State.gov -4.308e+12 8.550e+12 -5.040e-01 0.614372
## V2.Without.pay -4.308e+12 8.551e+12 -5.040e-01 0.614382
## V3 1.019e-06 2.175e-07 4.687e+00 2.77e-06 ***
## V4.10th -6.537e-01 1.826e-01 -3.579e+00 0.000345 ***
## V4.11th -7.294e-01 2.044e-01 -3.568e+00 0.000359 ***
## V4.12th -2.521e-01 2.832e-01 -8.900e-01 0.373348
## V4.1st.4th -5.050e-01 5.586e-01 -9.040e-01 0.365952
## V4.5th.6th -5.680e-01 3.741e-01 -1.518e+00 0.128967
## V4.7th.8th -1.287e+00 2.030e-01 -6.339e+00 2.31e-10 ***
## V4.9th -1.013e+00 2.999e-01 -3.376e+00 0.000734 ***
## V4.Assoc.acdm 5.309e-03 1.222e-01 4.300e-02 0.965361
## V4.Assoc.voc -4.145e-02 1.021e-01 -4.060e-01 0.684913
## V4.Bachelors 5.747e-01 6.873e-02 8.361e+00 < 2e-16 ***
## V4.Doctorate 1.132e+00 1.918e-01 5.905e+00 3.53e-09 ***
## V4.HS.grad -3.084e-01 6.013e-02 -5.129e+00 2.91e-07 ***
## V4.Masters 9.349e-01 1.021e-01 9.157e+00 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 1.073e+00 1.699e-01 6.317e+00 2.67e-10 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Married.AF.spouse 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Married.civ.spouse 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Married.spouse.absent 4.528e+15 3.454e+13 1.311e+02 < 2e-16 ***
## V6.Never.married 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Separated 2.398e+13 3.454e+13 6.940e-01 0.487623
## V6.Widowed 2.398e+13 3.454e+13 6.940e-01 0.487623
## V7.. NA NA NA NA
## V7.Adm.clerical 6.669e-01 1.361e-01 4.899e+00 9.63e-07 ***
## V7.Armed.Forces -5.528e-01 1.732e+00 -3.190e-01 0.749587
## V7.Craft.repair 1.326e-01 9.167e-02 1.447e+00 0.148034
## V7.Exec.managerial 9.516e-01 9.742e-02 9.768e+00 < 2e-16 ***
## V7.Farming.fishing -6.079e-01 1.485e-01 -4.092e+00 4.27e-05 ***
## V7.Handlers.cleaners 6.106e-02 1.781e-01 3.430e-01 0.731742
## V7.Machine.op.inspct 1.666e-01 1.202e-01 1.386e+00 0.165808
## V7.Other.service 1.136e-02 1.794e-01 6.300e-02 0.949500
## V7.Priv.house.serv -2.811e+01 3.621e+05 0.000e+00 0.999938
## V7.Prof.specialty 6.883e-01 1.073e-01 6.417e+00 1.39e-10 ***
## V7.Protective.serv 6.582e-01 1.474e-01 4.466e+00 7.97e-06 ***
## V7.Sales 4.691e-01 1.005e-01 4.669e+00 3.03e-06 ***
## V7.Tech.support 9.348e-01 1.447e-01 6.462e+00 1.03e-10 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband 4.404e-01 1.426e+00 3.090e-01 0.757500
## V8.Not.in.family 1.221e+00 1.601e+00 7.630e-01 0.445618
## V8.Other.relative 7.811e-01 1.527e+00 5.120e-01 0.608889
## V8.Own.child 1.533e+00 1.591e+00 9.630e-01 0.335406
## V8.Unmarried 2.370e+01 3.638e+04 1.000e-03 0.999480
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -4.932e-01 3.265e-01 -1.511e+00 0.130889
## V9.Asian.Pac.Islander 2.948e-01 2.032e-01 1.450e+00 0.146939
## V9.Black 9.881e-01 1.415e-01 6.985e+00 2.84e-12 ***
## V9.Other 4.578e-01 4.690e-01 9.760e-01 0.329059
## V9.White NA NA NA NA
## V10.Female 4.686e+00 1.511e+00 3.101e+00 0.001930 **
## V10.Male NA NA NA NA
## V11 2.798e-04 1.449e-05 1.931e+01 < 2e-16 ***
## V12 5.656e-04 4.669e-05 1.211e+01 < 2e-16 ***
## V13 2.005e-02 2.014e-03 9.957e+00 < 2e-16 ***
## V14.. -3.265e-01 6.869e-01 -4.750e-01 0.634603
## V14.Cambodia 1.581e+00 1.115e+00 1.418e+00 0.156332
## V14.Canada 3.195e-01 7.366e-01 4.340e-01 0.664429
## V14.China -1.030e+00 8.133e-01 -1.267e+00 0.205153
## V14.Columbia -2.191e+00 1.156e+00 -1.895e+00 0.058028 .
## V14.Cuba 5.253e-01 7.717e-01 6.810e-01 0.496104
## V14.Dominican.Republic -2.697e+01 2.279e+05 0.000e+00 0.999906
## V14.Ecuador -2.249e-01 1.043e+00 -2.160e-01 0.829290
## V14.El.Salvador -3.510e-01 9.132e-01 -3.840e-01 0.700685
## V14.England 2.505e-01 7.825e-01 3.200e-01 0.748876
## V14.France 5.230e-01 1.010e+00 5.180e-01 0.604632
## V14.Germany 4.033e-01 7.343e-01 5.490e-01 0.582832
## V14.Greece -1.370e+00 9.195e-01 -1.490e+00 0.136305
## V14.Guatemala -1.322e+00 1.963e+00 -6.740e-01 0.500601
## V14.Haiti 1.385e-01 1.430e+00 9.700e-02 0.922871
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 4.504e+15 6.711e+07 6.711e+07 < 2e-16 ***
## V14.Hong -1.354e-01 1.065e+00 -1.270e-01 0.898818
## V14.Hungary -3.015e-02 1.194e+00 -2.500e-02 0.979847
## V14.India -9.204e-01 7.529e-01 -1.222e+00 0.221573
## V14.Iran -6.423e-02 8.333e-01 -7.700e-02 0.938563
## V14.Ireland 1.402e+00 1.331e+00 1.054e+00 0.291937
## V14.Italy 1.437e-01 7.628e-01 1.880e-01 0.850565
## V14.Jamaica -2.473e-01 9.478e-01 -2.610e-01 0.794146
## V14.Japan -4.180e-01 8.248e-01 -5.070e-01 0.612321
## V14.Laos 2.603e+01 4.776e+05 0.000e+00 0.999957
## V14.Mexico 1.362e-01 7.326e-01 1.860e-01 0.852530
## V14.Nicaragua -1.396e+00 1.467e+00 -9.520e-01 0.341303
## V14.Outlying.US.Guam.USVI.etc. -2.492e+01 2.745e+05 0.000e+00 0.999928
## V14.Peru 2.271e-01 1.315e+00 1.730e-01 0.862897
## V14.Philippines 5.721e-01 7.622e-01 7.510e-01 0.452892
## V14.Poland -1.261e-01 8.095e-01 -1.560e-01 0.876255
## V14.Portugal -3.059e-01 1.145e+00 -2.670e-01 0.789288
## V14.Puerto.Rico -4.806e-01 9.040e-01 -5.320e-01 0.594987
## V14.Scotland 7.373e-01 1.418e+00 5.200e-01 0.603005
## V14.South -9.358e-01 8.345e-01 -1.121e+00 0.262128
## V14.Taiwan -5.569e-01 8.562e-01 -6.500e-01 0.515408
## V14.Thailand -6.617e-02 1.450e+00 -4.600e-02 0.963608
## V14.Trinadad.Tobago 2.728e+01 2.628e+05 0.000e+00 0.999917
## V14.United.States 9.008e-02 6.663e-01 1.350e-01 0.892453
## V14.Vietnam -1.255e+00 1.123e+00 -1.118e+00 0.263501
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 16823 on 12205 degrees of freedom
## Residual deviance: 12903 on 12108 degrees of freedom
## AIC: 13099
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n2_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1723 509
## >50K 5693 1843
##
## Accuracy : 0.3651
## 95% CI : (0.3555, 0.3747)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0091
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2323
## Specificity : 0.7836
## Pos Pred Value : 0.7720
## Neg Pred Value : 0.2446
## Prevalence : 0.7592
## Detection Rate : 0.1764
## Detection Prevalence : 0.2285
## Balanced Accuracy : 0.5080
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1723 509
## >50K 5693 1843
##
## Accuracy : 0.3651
## 95% CI : (0.3555, 0.3747)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0091
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2323
## Specificity : 0.7836
## Pos Pred Value : 0.7720
## Neg Pred Value : 0.2446
## Prevalence : 0.7592
## Detection Rate : 0.1764
## Detection Prevalence : 0.2285
## Balanced Accuracy : 0.5080
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.36506962 0.00908627 0.35551219 0.37470756 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2323355 0.7835884 0.7719534
## Neg Pred Value Precision Recall
## 0.2445594 0.7719534 0.2323355
## F1 Prevalence Detection Rate
## 0.3571725 0.7592138 0.1763923
## Detection Prevalence Balanced Accuracy
## 0.2285012 0.5079620
ad_tda_pc_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n2_lr_fit_re)
diff_tda_pca_5.50.5_lr_n2_3_fold
## Accuracy
## 1 0.1284116
## 2 0.1273210
## 3 0.1438135
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n2_3_fold$probRight
bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009833333
##
## $winRight
## [1] 0.9901667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_3_fold
## $left
## [1] 0.0009195529
##
## $rope
## [1] 0.000321639
##
## $right
## [1] 0.9987588
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
#bf_tda_pca_5.50.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold)
## t = 25.011, df = 2, p-value = 0.001595
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1102703 0.1560938
## sample estimates:
## mean of x
## 0.133182
### Test set diff
diff_tda_pca_5.50.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n2_test
## Accuracy
## 0.4875102
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n2_test_odds.left<-bst_tda_pca_5.50.5_lr.n2_test$probLeft/bst_tda_pca_5.50.5_lr.n2_test$probRight
bst_tda_pca_5.50.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1591
##
## $winRight
## [1] 0.8409
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n3_LrFit0
## Generalized Linear Model
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results:
##
## Accuracy Kappa
## 0.8284742 0.4265563
Adult_TDA_PC_5.50.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8305008 0.4347173 Fold1
## 2 0.8248357 0.4140545 Fold2
## 3 0.8300861 0.4308971 Fold3
ad_tda_pc_5.50.5_n3_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.180e+13 8.650e+12 -1.364 0.172639
## V1 2.544e-03 2.296e-03 1.108 0.267922
## V2.. 1.180e+13 8.650e+12 1.364 0.172639
## V2.Federal.gov 1.180e+13 8.650e+12 1.364 0.172639
## V2.Local.gov 1.180e+13 8.650e+12 1.364 0.172639
## V2.Never.worked NA NA NA NA
## V2.Private 1.180e+13 8.650e+12 1.364 0.172639
## V2.Self.emp.inc 1.180e+13 8.650e+12 1.364 0.172639
## V2.Self.emp.not.inc 1.180e+13 8.650e+12 1.364 0.172639
## V2.State.gov 1.180e+13 8.650e+12 1.364 0.172639
## V2.Without.pay 1.180e+13 8.650e+12 1.364 0.172639
## V3 1.127e-06 2.361e-07 4.773 1.81e-06 ***
## V4.10th -3.082e-01 1.751e-01 -1.760 0.078340 .
## V4.11th -4.031e-01 1.777e-01 -2.269 0.023258 *
## V4.12th 1.067e-01 2.582e-01 0.413 0.679442
## V4.1st.4th -9.017e-01 4.887e-01 -1.845 0.065014 .
## V4.5th.6th -9.334e-01 3.349e-01 -2.787 0.005318 **
## V4.7th.8th -1.341e+00 2.545e-01 -5.269 1.37e-07 ***
## V4.9th -9.128e-01 2.554e-01 -3.575 0.000351 ***
## V4.Assoc.acdm -5.927e-01 1.449e-01 -4.092 4.28e-05 ***
## V4.Assoc.voc -4.086e-01 1.262e-01 -3.236 0.001210 **
## V4.Bachelors -6.358e-01 8.503e-02 -7.478 7.57e-14 ***
## V4.Doctorate 1.530e-01 2.176e-01 0.703 0.482035
## V4.HS.grad -3.751e-01 6.898e-02 -5.437 5.42e-08 ***
## V4.Masters -4.995e-01 1.200e-01 -4.163 3.14e-05 ***
## V4.Preschool -3.157e+01 6.581e+04 0.000 0.999617
## V4.Prof.school -7.407e-02 1.962e-01 -0.378 0.705726
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -5.751e-01 1.975e-01 -2.912 0.003586 **
## V6.Married.AF.spouse 1.453e+00 8.056e-01 1.804 0.071232 .
## V6.Married.civ.spouse -2.388e-01 3.406e-01 -0.701 0.483250
## V6.Married.spouse.absent -4.412e-01 3.077e-01 -1.434 0.151629
## V6.Never.married -3.763e-01 2.065e-01 -1.822 0.068456 .
## V6.Separated -3.602e-01 2.592e-01 -1.389 0.164685
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 1.219e+00 1.374e-01 8.872 < 2e-16 ***
## V7.Armed.Forces -2.519e+01 2.088e+05 0.000 0.999904
## V7.Craft.repair 1.752e-01 1.230e-01 1.425 0.154147
## V7.Exec.managerial 5.016e-01 1.296e-01 3.871 0.000108 ***
## V7.Farming.fishing -1.295e+00 2.989e-01 -4.331 1.48e-05 ***
## V7.Handlers.cleaners 4.344e-01 1.723e-01 2.522 0.011685 *
## V7.Machine.op.inspct 6.695e-01 1.368e-01 4.895 9.84e-07 ***
## V7.Other.service 3.295e-01 1.593e-01 2.069 0.038550 *
## V7.Priv.house.serv -2.618e+00 7.482e+00 -0.350 0.726442
## V7.Prof.specialty 4.234e-01 1.366e-01 3.100 0.001937 **
## V7.Protective.serv 2.434e-01 2.090e-01 1.164 0.244225
## V7.Sales 7.925e-01 1.283e-01 6.175 6.61e-10 ***
## V7.Tech.support 1.111e+00 1.648e-01 6.740 1.58e-11 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -8.944e-01 1.333e-01 -6.709 1.97e-11 ***
## V8.Not.in.family 3.686e-02 3.046e-01 0.121 0.903684
## V8.Other.relative -4.079e-01 2.831e-01 -1.441 0.149643
## V8.Own.child -4.954e-01 2.973e-01 -1.666 0.095670 .
## V8.Unmarried 1.740e-01 3.193e-01 0.545 0.585776
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 3.932e-01 2.667e-01 1.474 0.140444
## V9.Asian.Pac.Islander 6.353e-01 2.048e-01 3.101 0.001926 **
## V9.Black 9.496e-01 9.520e-02 9.975 < 2e-16 ***
## V9.Other 4.337e-01 3.313e-01 1.309 0.190520
## V9.White NA NA NA NA
## V10.Female 1.575e+00 1.125e-01 13.996 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 2.870e-04 1.397e-05 20.547 < 2e-16 ***
## V12 2.752e-04 5.333e-05 5.161 2.46e-07 ***
## V13 4.299e-03 2.299e-03 1.870 0.061477 .
## V14.. -1.261e+00 8.687e-01 -1.451 0.146647
## V14.Cambodia -2.165e-02 1.122e+00 -0.019 0.984608
## V14.Canada -1.081e+00 9.282e-01 -1.165 0.244126
## V14.China -2.365e+00 1.014e+00 -2.333 0.019646 *
## V14.Columbia -2.434e+00 1.350e+00 -1.803 0.071370 .
## V14.Cuba -2.517e-01 9.402e-01 -0.268 0.788946
## V14.Dominican.Republic -2.448e+00 1.379e+00 -1.775 0.075862 .
## V14.Ecuador -3.383e-01 1.170e+00 -0.289 0.772471
## V14.El.Salvador -1.134e+00 1.040e+00 -1.090 0.275655
## V14.England -5.701e-01 9.346e-01 -0.610 0.541868
## V14.France -8.438e-01 1.125e+00 -0.750 0.453229
## V14.Germany -3.181e-01 9.164e-01 -0.347 0.728528
## V14.Greece -2.236e+00 1.227e+00 -1.823 0.068329 .
## V14.Guatemala -9.200e-01 1.120e+00 -0.821 0.411456
## V14.Haiti -5.254e-01 1.101e+00 -0.477 0.633061
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 2.462e+01 4.015e+05 0.000 0.999951
## V14.Hong -1.924e+00 1.476e+00 -1.303 0.192448
## V14.Hungary -1.522e+00 1.448e+00 -1.051 0.293204
## V14.India -1.700e+00 9.754e-01 -1.743 0.081350 .
## V14.Iran -1.922e+00 1.135e+00 -1.693 0.090511 .
## V14.Ireland -8.795e-01 1.248e+00 -0.705 0.480868
## V14.Italy -6.023e-01 9.656e-01 -0.624 0.532777
## V14.Jamaica -5.346e-01 9.737e-01 -0.549 0.582947
## V14.Japan -8.683e-01 1.004e+00 -0.865 0.387015
## V14.Laos -2.194e+00 1.435e+00 -1.528 0.126423
## V14.Mexico -1.498e+00 8.854e-01 -1.691 0.090751 .
## V14.Nicaragua -1.320e+00 1.182e+00 -1.117 0.264098
## V14.Outlying.US.Guam.USVI.etc. -2.500e+01 1.811e+05 0.000 0.999890
## V14.Peru -1.708e+00 1.441e+00 -1.185 0.235994
## V14.Philippines -6.762e-01 9.112e-01 -0.742 0.458005
## V14.Poland -9.216e-01 9.850e-01 -0.936 0.349454
## V14.Portugal -1.939e+00 1.388e+00 -1.397 0.162338
## V14.Puerto.Rico -1.003e+00 9.656e-01 -1.038 0.299097
## V14.Scotland -7.464e-01 1.355e+00 -0.551 0.581801
## V14.South -1.846e+00 1.019e+00 -1.812 0.070008 .
## V14.Taiwan -1.104e+00 1.055e+00 -1.047 0.295222
## V14.Thailand -1.627e+00 1.500e+00 -1.085 0.277885
## V14.Trinadad.Tobago -9.675e-01 1.215e+00 -0.796 0.425941
## V14.United.States -9.622e-01 8.492e-01 -1.133 0.257193
## V14.Vietnam -1.750e+00 1.071e+00 -1.634 0.102245
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 14233 on 13239 degrees of freedom
## Residual deviance: 10681 on 13142 degrees of freedom
## AIC: 10877
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n3_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5130 1739
## >50K 2286 613
##
## Accuracy : 0.5879
## 95% CI : (0.5781, 0.5977)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0441
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6917
## Specificity : 0.2606
## Pos Pred Value : 0.7468
## Neg Pred Value : 0.2115
## Prevalence : 0.7592
## Detection Rate : 0.5252
## Detection Prevalence : 0.7032
## Balanced Accuracy : 0.4762
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5130 1739
## >50K 2286 613
##
## Accuracy : 0.5879
## 95% CI : (0.5781, 0.5977)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0441
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6917
## Specificity : 0.2606
## Pos Pred Value : 0.7468
## Neg Pred Value : 0.2115
## Prevalence : 0.7592
## Detection Rate : 0.5252
## Detection Prevalence : 0.7032
## Balanced Accuracy : 0.4762
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.879402e-01 -4.411987e-02 5.781030e-01 5.977249e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 7.554508e-18
ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6917476 0.2606293 0.7468336
## Neg Pred Value Precision Recall
## 0.2114522 0.7468336 0.6917476
## F1 Prevalence Detection Rate
## 0.7182359 0.7592138 0.5251843
## Detection Prevalence Balanced Accuracy
## 0.7032146 0.4761884
ad_tda_pc_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n3_lr_fit_re)
diff_tda_pca_5.50.5_lr_n3_3_fold
## Accuracy
## 1 0.1284116
## 2 0.1273210
## 3 0.1438135
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n3_3_fold$probRight
bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009366667
##
## $winRight
## [1] 0.9906333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_3_fold
## $left
## [1] 0.0009195529
##
## $rope
## [1] 0.000321639
##
## $right
## [1] 0.9987588
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
#bf_tda_pca_5.50.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold)
## t = 25.011, df = 2, p-value = 0.001595
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1102703 0.1560938
## sample estimates:
## mean of x
## 0.133182
### Test set diff
diff_tda_pca_5.50.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n3_test
## Accuracy
## 0.2646396
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n3_test_odds.left<-bst_tda_pca_5.50.5_lr.n3_test$probLeft/bst_tda_pca_5.50.5_lr.n3_test$probRight
bst_tda_pca_5.50.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1563
##
## $winRight
## [1] 0.8437
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n3_test))
##Node4
Adult_TDA_PC_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n4_LrFit0
## Generalized Linear Model
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11134, 11133, 11133
## Resampling results:
##
## Accuracy Kappa
## 0.8176725 0.2025869
Adult_TDA_PC_5.50.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9482573 0.23648689 Fold1
## 2 0.5523621 0.07866163 Fold2
## 3 0.9523981 0.29261226 Fold3
ad_tda_pc_5.50.5_n4_lr_fit_re<-Adult_TDA_PC_5.50.5_n4_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.026e+12 1.131e+13 0.445 0.656642
## V1 2.109e-02 3.735e-03 5.647 1.63e-08 ***
## V2.. -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Federal.gov -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Local.gov -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Never.worked -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Private -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Self.emp.inc -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Self.emp.not.inc -5.026e+12 1.131e+13 -0.445 0.656642
## V2.State.gov -5.026e+12 1.131e+13 -0.445 0.656642
## V2.Without.pay -5.026e+12 1.131e+13 -0.445 0.656642
## V3 7.685e-07 3.631e-07 2.116 0.034327 *
## V4.10th -1.539e+00 4.888e-01 -3.148 0.001642 **
## V4.11th -3.349e-01 3.036e-01 -1.103 0.270076
## V4.12th -8.699e-01 4.911e-01 -1.771 0.076505 .
## V4.1st.4th -2.287e+01 2.637e+04 -0.001 0.999308
## V4.5th.6th -9.827e-01 6.698e-01 -1.467 0.142306
## V4.7th.8th -9.913e-01 4.586e-01 -2.162 0.030647 *
## V4.9th -3.022e-01 4.128e-01 -0.732 0.464162
## V4.Assoc.acdm -1.521e-02 1.939e-01 -0.078 0.937472
## V4.Assoc.voc -2.068e-02 1.890e-01 -0.109 0.912866
## V4.Bachelors 3.228e-01 1.220e-01 2.646 0.008142 **
## V4.Doctorate 1.209e+00 3.656e-01 3.307 0.000943 ***
## V4.HS.grad -4.158e-01 1.105e-01 -3.763 0.000168 ***
## V4.Masters 4.200e-01 1.832e-01 2.292 0.021895 *
## V4.Preschool -2.166e+02 1.119e+07 0.000 0.999985
## V4.Prof.school 5.598e-01 3.690e-01 1.517 0.129205
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 1.284e-02 1.847e-01 0.070 0.944562
## V6.Married.AF.spouse 3.113e+00 7.265e-01 4.285 1.83e-05 ***
## V6.Married.civ.spouse 1.928e+00 4.073e-01 4.734 2.20e-06 ***
## V6.Married.spouse.absent 4.426e-02 3.222e-01 0.137 0.890729
## V6.Never.married -2.178e-01 2.053e-01 -1.061 0.288801
## V6.Separated -3.133e-01 2.699e-01 -1.161 0.245690
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 5.661e-02 2.573e-01 0.220 0.825873
## V7.Armed.Forces -2.308e+01 1.302e+05 0.000 0.999859
## V7.Craft.repair 1.553e-01 2.726e-01 0.570 0.568930
## V7.Exec.managerial 2.120e-01 2.630e-01 0.806 0.420201
## V7.Farming.fishing -2.666e+00 1.019e+00 -2.615 0.008920 **
## V7.Handlers.cleaners -7.790e-01 4.203e-01 -1.853 0.063838 .
## V7.Machine.op.inspct -7.407e-01 3.248e-01 -2.280 0.022592 *
## V7.Other.service -5.282e-01 2.812e-01 -1.878 0.060362 .
## V7.Priv.house.serv -4.245e+00 2.429e+00 -1.748 0.080495 .
## V7.Prof.specialty 3.726e-02 2.693e-01 0.138 0.889934
## V7.Protective.serv 7.033e-01 3.679e-01 1.912 0.055919 .
## V7.Sales 4.649e-02 2.667e-01 0.174 0.861630
## V7.Tech.support 2.779e-01 3.017e-01 0.921 0.357044
## V7.Transport.moving NA NA NA NA
## V8.Husband 4.241e+02 3.242e+05 0.001 0.998956
## V8.Not.in.family -1.275e-01 3.771e-01 -0.338 0.735328
## V8.Other.relative -1.607e+00 3.870e-01 -4.153 3.28e-05 ***
## V8.Own.child -1.321e+00 3.610e-01 -3.658 0.000254 ***
## V8.Unmarried -1.855e-01 3.874e-01 -0.479 0.632133
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 2.881e-01 3.548e-01 0.812 0.416757
## V9.Asian.Pac.Islander 4.901e-01 2.870e-01 1.707 0.087742 .
## V9.Black 1.134e-01 1.326e-01 0.855 0.392497
## V9.Other 6.152e-02 4.913e-01 0.125 0.900359
## V9.White NA NA NA NA
## V10.Female 2.783e-01 1.074e-01 2.590 0.009585 **
## V10.Male NA NA NA NA
## V11 3.618e-04 1.712e-05 21.131 < 2e-16 ***
## V12 3.504e-04 9.101e-05 3.850 0.000118 ***
## V13 2.382e-02 3.499e-03 6.809 9.82e-12 ***
## V14.. -2.180e+00 1.310e+00 -1.665 0.095997 .
## V14.Cambodia -2.357e+01 4.374e+04 -0.001 0.999570
## V14.Canada -2.264e+00 1.406e+00 -1.610 0.107299
## V14.China -2.090e+00 1.447e+00 -1.444 0.148672
## V14.Columbia -2.527e+01 5.220e+04 0.000 0.999614
## V14.Cuba -2.811e+00 1.498e+00 -1.877 0.060565 .
## V14.Dominican.Republic -2.739e+00 1.671e+00 -1.639 0.101231
## V14.Ecuador -2.487e+01 8.388e+04 0.000 0.999763
## V14.El.Salvador -2.753e+00 1.689e+00 -1.630 0.103163
## V14.England -2.255e+00 1.407e+00 -1.603 0.108979
## V14.France -2.673e+00 1.776e+00 -1.505 0.132281
## V14.Germany -2.223e+00 1.377e+00 -1.614 0.106540
## V14.Greece -1.724e+00 1.752e+00 -0.984 0.325093
## V14.Guatemala -4.375e-01 1.506e+00 -0.291 0.771422
## V14.Haiti -2.640e+00 1.704e+00 -1.549 0.121412
## V14.Holand.Netherlands -2.397e+01 3.370e+05 0.000 0.999943
## V14.Honduras -2.508e+01 1.011e+05 0.000 0.999802
## V14.Hong -2.605e+01 8.869e+04 0.000 0.999766
## V14.Hungary -1.846e+00 1.721e+00 -1.073 0.283395
## V14.India -2.515e+00 1.526e+00 -1.648 0.099286 .
## V14.Iran -2.600e+01 8.747e+04 0.000 0.999763
## V14.Ireland -1.923e+00 1.673e+00 -1.150 0.250346
## V14.Italy -1.033e+00 1.412e+00 -0.732 0.464434
## V14.Jamaica -1.851e+00 1.526e+00 -1.212 0.225324
## V14.Japan -1.023e+00 1.404e+00 -0.729 0.466262
## V14.Laos -2.286e+00 1.742e+00 -1.312 0.189437
## V14.Mexico -2.746e+00 1.369e+00 -2.006 0.044837 *
## V14.Nicaragua -1.642e+00 1.723e+00 -0.953 0.340633
## V14.Outlying.US.Guam.USVI.etc. -2.619e+01 9.667e+04 0.000 0.999784
## V14.Peru -2.527e+01 7.152e+04 0.000 0.999718
## V14.Philippines -2.303e+00 1.370e+00 -1.681 0.092735 .
## V14.Poland -2.459e+00 1.651e+00 -1.489 0.136359
## V14.Portugal -1.331e+00 1.537e+00 -0.866 0.386319
## V14.Puerto.Rico -2.014e+00 1.384e+00 -1.455 0.145592
## V14.Scotland -2.578e+00 1.821e+00 -1.416 0.156909
## V14.South -3.242e+00 1.538e+00 -2.108 0.035001 *
## V14.Taiwan -1.898e+00 1.527e+00 -1.243 0.213856
## V14.Thailand -2.681e+01 1.114e+05 0.000 0.999808
## V14.Trinadad.Tobago -2.591e+01 1.070e+05 0.000 0.999807
## V14.United.States -2.274e+00 1.281e+00 -1.776 0.075762 .
## V14.Vietnam -2.216e+00 1.562e+00 -1.419 0.155991
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7122.1 on 16699 degrees of freedom
## Residual deviance: 130876.7 on 16600 degrees of freedom
## AIC: 131077
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n4_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5233 442
## >50K 2183 1910
##
## Accuracy : 0.7313
## 95% CI : (0.7224, 0.74)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.4133
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.7056
## Specificity : 0.8121
## Pos Pred Value : 0.9221
## Neg Pred Value : 0.4667
## Prevalence : 0.7592
## Detection Rate : 0.5357
## Detection Prevalence : 0.5810
## Balanced Accuracy : 0.7589
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5233 442
## >50K 2183 1910
##
## Accuracy : 0.7313
## 95% CI : (0.7224, 0.74)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.4133
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.7056
## Specificity : 0.8121
## Pos Pred Value : 0.9221
## Neg Pred Value : 0.4667
## Prevalence : 0.7592
## Detection Rate : 0.5357
## Detection Prevalence : 0.5810
## Balanced Accuracy : 0.7589
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.312654e-01 4.132667e-01 7.223549e-01 7.400377e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 8.301493e-253
ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.7056365 0.8120748 0.9221145
## Neg Pred Value Precision Recall
## 0.4666504 0.9221145 0.7056365
## F1 Prevalence Detection Rate
## 0.7994806 0.7592138 0.5357289
## Detection Prevalence Balanced Accuracy
## 0.5809787 0.7588556
ad_tda_pc_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n4_lr_fit_re)
diff_tda_pca_5.50.5_lr_n4_3_fold
## Accuracy
## 1 -0.09713183
## 2 0.29707193
## 3 -0.10283238
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n4_3_fold$probRight
bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0.5369333
##
## $winRope
## [1] 0.0158
##
## $winRight
## [1] 0.4472667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_3_fold
## $left
## [1] 0.4038195
##
## $rope
## [1] 0.0447096
##
## $right
## [1] 0.5514709
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
#bf_tda_pca_5.50.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold)
## t = 0.24455, df = 2, p-value = 0.8296
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.5371367 0.6018751
## sample estimates:
## mean of x
## 0.03236924
### Test set diff
diff_tda_pca_5.50.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n4_test
## Accuracy
## 0.1213145
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n4_test_odds.left<-bst_tda_pca_5.50.5_lr.n4_test$probLeft/bst_tda_pca_5.50.5_lr.n4_test$probRight
bst_tda_pca_5.50.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1547333
##
## $winRight
## [1] 0.8452667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.50.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n5_LrFit0
## Generalized Linear Model
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9604, 9602, 9602
## Resampling results:
##
## Accuracy Kappa
## 0.9684156 0.01281264
Adult_TDA_PC_5.50.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9975000 -0.0009383797 Fold1
## 2 0.9162849 -0.0040778877 Fold2
## 3 0.9914619 0.0434541797 Fold3
ad_tda_pc_5.50.5_n5_lr_fit_re<-Adult_TDA_PC_5.50.5_n5_LrFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.162e+15 4.232e+07 -51101694 <2e-16 ***
## V1 2.376e+12 6.085e+04 39044136 <2e-16 ***
## V2.. -2.941e+15 2.417e+07 -121660805 <2e-16 ***
## V2.Federal.gov -1.699e+14 2.417e+07 -7030029 <2e-16 ***
## V2.Local.gov -2.387e+15 2.400e+07 -99452186 <2e-16 ***
## V2.Never.worked -5.359e+14 3.500e+07 -15312477 <2e-16 ***
## V2.Private -1.711e+15 2.386e+07 -71711904 <2e-16 ***
## V2.Self.emp.inc -1.060e+15 2.524e+07 -42001880 <2e-16 ***
## V2.Self.emp.not.inc -2.324e+15 2.413e+07 -96314772 <2e-16 ***
## V2.State.gov -2.249e+15 2.405e+07 -93530750 <2e-16 ***
## V2.Without.pay NA NA NA NA
## V3 1.936e+08 5.347e+00 36197956 <2e-16 ***
## V4.10th 8.384e+14 3.033e+06 276394991 <2e-16 ***
## V4.11th -2.572e+14 2.616e+06 -98302261 <2e-16 ***
## V4.12th 1.142e+15 3.992e+06 285991075 <2e-16 ***
## V4.1st.4th 4.872e+14 7.471e+06 65209586 <2e-16 ***
## V4.5th.6th 4.300e+14 5.646e+06 76171359 <2e-16 ***
## V4.7th.8th -2.477e+14 4.295e+06 -57658375 <2e-16 ***
## V4.9th 8.353e+14 4.195e+06 199139001 <2e-16 ***
## V4.Assoc.acdm 1.640e+15 3.285e+06 499144179 <2e-16 ***
## V4.Assoc.voc 1.068e+13 3.097e+06 3446569 <2e-16 ***
## V4.Bachelors 1.544e+15 2.227e+06 693399248 <2e-16 ***
## V4.Doctorate 1.841e+15 2.545e+07 72331489 <2e-16 ***
## V4.HS.grad -2.490e+14 1.468e+06 -169661737 <2e-16 ***
## V4.Masters -6.746e+14 4.802e+06 -140470411 <2e-16 ***
## V4.Preschool 5.415e+14 1.147e+07 47195269 <2e-16 ***
## V4.Prof.school 1.703e+15 1.519e+07 112129727 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 4.259e+14 2.874e+06 148178073 <2e-16 ***
## V6.Married.AF.spouse -1.543e+14 2.463e+07 -6265107 <2e-16 ***
## V6.Married.civ.spouse 1.217e+15 7.595e+06 160187500 <2e-16 ***
## V6.Married.spouse.absent 1.506e+13 4.792e+06 3142987 <2e-16 ***
## V6.Never.married 4.063e+14 3.141e+06 129339448 <2e-16 ***
## V6.Separated 3.580e+14 3.572e+06 100204898 <2e-16 ***
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -1.017e+15 3.743e+06 -271623876 <2e-16 ***
## V7.Armed.Forces -1.176e+15 3.396e+07 -34625127 <2e-16 ***
## V7.Craft.repair 6.660e+14 4.032e+06 165173011 <2e-16 ***
## V7.Exec.managerial -2.748e+14 4.233e+06 -64926351 <2e-16 ***
## V7.Farming.fishing 7.219e+14 5.332e+06 135380403 <2e-16 ***
## V7.Handlers.cleaners 7.555e+14 4.069e+06 185659303 <2e-16 ***
## V7.Machine.op.inspct 5.763e+13 4.033e+06 14288309 <2e-16 ***
## V7.Other.service 3.967e+13 3.697e+06 10731345 <2e-16 ***
## V7.Priv.house.serv -7.565e+14 6.687e+06 -113133292 <2e-16 ***
## V7.Prof.specialty -2.301e+14 4.284e+06 -53698560 <2e-16 ***
## V7.Protective.serv 5.842e+13 6.468e+06 9032610 <2e-16 ***
## V7.Sales 1.079e+14 3.869e+06 27884603 <2e-16 ***
## V7.Tech.support -1.244e+15 4.879e+06 -255023701 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband NA NA NA NA
## V8.Not.in.family 1.050e+15 7.815e+06 134312655 <2e-16 ***
## V8.Other.relative 6.155e+14 7.766e+06 79260148 <2e-16 ***
## V8.Own.child 6.824e+14 7.775e+06 87770147 <2e-16 ***
## V8.Unmarried 1.163e+15 7.891e+06 147418645 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 7.955e+14 5.009e+06 158791158 <2e-16 ***
## V9.Asian.Pac.Islander 5.606e+14 4.575e+06 122534488 <2e-16 ***
## V9.Black 5.857e+13 1.646e+06 35588559 <2e-16 ***
## V9.Other -1.631e+14 5.283e+06 -30878904 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female 1.778e+15 1.416e+06 1256171611 <2e-16 ***
## V10.Male NA NA NA NA
## V11 2.519e+10 6.411e+02 39287154 <2e-16 ***
## V12 6.275e+11 2.151e+03 291765144 <2e-16 ***
## V13 -4.315e+11 5.322e+04 -8106976 <2e-16 ***
## V14.. -1.353e+15 3.389e+07 -39929374 <2e-16 ***
## V14.Cambodia -1.630e+15 4.235e+07 -38482872 <2e-16 ***
## V14.Canada -1.194e+15 3.516e+07 -33962427 <2e-16 ***
## V14.China -3.368e+15 3.680e+07 -91523027 <2e-16 ***
## V14.Columbia -9.336e+14 3.529e+07 -26452174 <2e-16 ***
## V14.Cuba -1.658e+15 3.517e+07 -47133098 <2e-16 ***
## V14.Dominican.Republic -7.311e+14 3.493e+07 -20929588 <2e-16 ***
## V14.Ecuador -8.851e+14 3.764e+07 -23516833 <2e-16 ***
## V14.El.Salvador -1.400e+15 3.455e+07 -40520006 <2e-16 ***
## V14.England -9.049e+14 3.546e+07 -25518749 <2e-16 ***
## V14.France -1.156e+15 4.041e+07 -28616005 <2e-16 ***
## V14.Germany -1.720e+15 3.467e+07 -49613234 <2e-16 ***
## V14.Greece -1.254e+15 4.509e+07 -27813651 <2e-16 ***
## V14.Guatemala -1.687e+15 3.503e+07 -48141120 <2e-16 ***
## V14.Haiti -1.119e+15 3.557e+07 -31471154 <2e-16 ***
## V14.Holand.Netherlands -4.936e+15 7.528e+07 -65570303 <2e-16 ***
## V14.Honduras -1.554e+15 3.884e+07 -40018740 <2e-16 ***
## V14.Hong -1.433e+15 4.054e+07 -35339499 <2e-16 ***
## V14.Hungary -1.455e+14 4.509e+07 -3227333 <2e-16 ***
## V14.India -1.478e+15 3.687e+07 -40085617 <2e-16 ***
## V14.Iran -1.369e+14 4.335e+07 -3157781 <2e-16 ***
## V14.Ireland -2.012e+15 3.844e+07 -52329979 <2e-16 ***
## V14.Italy -1.160e+15 3.683e+07 -31487069 <2e-16 ***
## V14.Jamaica -8.793e+14 3.479e+07 -25275953 <2e-16 ***
## V14.Japan -1.230e+15 3.659e+07 -33630842 <2e-16 ***
## V14.Laos -1.349e+15 4.002e+07 -33707314 <2e-16 ***
## V14.Mexico -1.540e+15 3.383e+07 -45503793 <2e-16 ***
## V14.Nicaragua -4.598e+14 3.682e+07 -12488819 <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -1.919e+15 3.923e+07 -48921853 <2e-16 ***
## V14.Peru -3.046e+15 3.667e+07 -83061823 <2e-16 ***
## V14.Philippines -3.528e+15 3.460e+07 -101957990 <2e-16 ***
## V14.Poland -3.354e+15 3.630e+07 -92405163 <2e-16 ***
## V14.Portugal -1.466e+15 3.760e+07 -38980594 <2e-16 ***
## V14.Puerto.Rico -7.184e+14 3.454e+07 -20800085 <2e-16 ***
## V14.Scotland -3.348e+14 4.508e+07 -7426195 <2e-16 ***
## V14.South -1.565e+15 3.570e+07 -43840023 <2e-16 ***
## V14.Taiwan -3.826e+15 3.745e+07 -102162557 <2e-16 ***
## V14.Thailand -2.314e+15 3.946e+07 -58644892 <2e-16 ***
## V14.Trinadad.Tobago -1.090e+15 3.926e+07 -27767055 <2e-16 ***
## V14.United.States -2.224e+15 3.360e+07 -66189140 <2e-16 ***
## V14.Vietnam -1.710e+15 3.544e+07 -48267228 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 418.0 on 14403 degrees of freedom
## Residual deviance: 5550.7 on 14306 degrees of freedom
## AIC: 5746.7
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.50.5_n5_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.50.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7398 2338
## >50K 18 14
##
## Accuracy : 0.7588
## 95% CI : (0.7502, 0.7673)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5432
##
## Kappa : 0.0053
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.997573
## Specificity : 0.005952
## Pos Pred Value : 0.759860
## Neg Pred Value : 0.437500
## Prevalence : 0.759214
## Detection Rate : 0.757371
## Detection Prevalence : 0.996724
## Balanced Accuracy : 0.501763
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7398 2338
## >50K 18 14
##
## Accuracy : 0.7588
## 95% CI : (0.7502, 0.7673)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5432
##
## Kappa : 0.0053
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.997573
## Specificity : 0.005952
## Pos Pred Value : 0.759860
## Neg Pred Value : 0.437500
## Prevalence : 0.759214
## Detection Rate : 0.757371
## Detection Prevalence : 0.996724
## Balanced Accuracy : 0.501763
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.758804259 0.005315264 0.750192757 0.767261189 0.759213759
## AccuracyPValue McnemarPValue
## 0.543204853 0.000000000
ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.997572816 0.005952381 0.759860312
## Neg Pred Value Precision Recall
## 0.437500000 0.759860312 0.997572816
## F1 Prevalence Detection Rate
## 0.862639925 0.759213759 0.757371007
## Detection Prevalence Balanced Accuracy
## 0.996723997 0.501762598
ad_tda_pc_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n5_lr_fit_re)
diff_tda_pca_5.50.5_lr_n5_3_fold
## Accuracy
## 1 -0.14637456
## 2 -0.06685082
## 3 -0.14189622
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n5_3_fold$probRight
bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.9919333
##
## $winRope
## [1] 0.008066667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_3_fold
## $left
## [1] 0.9660373
##
## $rope
## [1] 0.009042925
##
## $right
## [1] 0.02491979
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
#bf_tda_pca_5.50.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold)
## t = -4.5892, df = 2, p-value = 0.04435
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2293562 -0.0073915
## sample estimates:
## mean of x
## -0.1183739
### Test set diff
diff_tda_pca_5.50.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n5_test
## Accuracy
## 0.09377559
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_lr.n5_test_odds.left<-bst_tda_pca_5.50.5_lr.n5_test$probLeft/bst_tda_pca_5.50.5_lr.n5_test$probRight
bst_tda_pca_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1584333
##
## $winRight
## [1] 0.8415667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n1_LrFit0
## Generalized Linear Model
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8924, 8924, 8926
## Resampling results:
##
## Accuracy Kappa
## 0.8591161 0.6172397
Adult_TDA_KDE_5.50.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8579431 0.6154375 Fold1
## 2 0.8664575 0.6335638 Fold2
## 3 0.8529478 0.6027178 Fold3
ad_tda_kde_5.50.5_n1_lr_fit_re<-Adult_TDA_KDE_5.50.5_n1_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.158e+13 1.510e+13 -1.429 0.153005
## V1 1.441e-02 2.451e-03 5.878 4.15e-09 ***
## V2.. 2.158e+13 1.510e+13 1.429 0.153005
## V2.Federal.gov 2.158e+13 1.510e+13 1.429 0.153005
## V2.Local.gov 2.158e+13 1.510e+13 1.429 0.153005
## V2.Never.worked 2.158e+13 1.510e+13 1.429 0.153005
## V2.Private 2.158e+13 1.510e+13 1.429 0.153005
## V2.Self.emp.inc 2.158e+13 1.510e+13 1.429 0.153005
## V2.Self.emp.not.inc 2.158e+13 1.510e+13 1.429 0.153005
## V2.State.gov 2.158e+13 1.510e+13 1.429 0.153005
## V2.Without.pay 2.158e+13 1.510e+13 1.429 0.153005
## V3 6.893e-07 2.127e-07 3.241 0.001190 **
## V4.10th -1.169e+00 1.707e-01 -6.848 7.49e-12 ***
## V4.11th -1.106e+00 1.708e-01 -6.476 9.41e-11 ***
## V4.12th -8.562e-01 3.188e-01 -2.686 0.007236 **
## V4.1st.4th -1.741e+00 4.843e-01 -3.595 0.000325 ***
## V4.5th.6th -1.482e+00 3.069e-01 -4.828 1.38e-06 ***
## V4.7th.8th -1.618e+00 1.957e-01 -8.268 < 2e-16 ***
## V4.9th -1.325e+00 2.285e-01 -5.797 6.76e-09 ***
## V4.Assoc.acdm 1.387e-01 1.601e-01 0.866 0.386287
## V4.Assoc.voc 1.032e-01 1.585e-01 0.651 0.515109
## V4.Bachelors 5.937e-01 9.984e-02 5.947 2.74e-09 ***
## V4.Doctorate 1.835e+00 1.768e-01 10.379 < 2e-16 ***
## V4.HS.grad -4.623e-01 9.987e-02 -4.629 3.67e-06 ***
## V4.Masters 1.019e+00 1.205e-01 8.462 < 2e-16 ***
## V4.Preschool -3.217e+01 6.329e+04 -0.001 0.999594
## V4.Prof.school 1.589e+00 1.577e-01 10.082 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.860e-01 1.855e-01 -1.542 0.122995
## V6.Married.AF.spouse 2.264e+00 8.805e-01 2.571 0.010128 *
## V6.Married.civ.spouse 1.810e+00 4.553e-01 3.976 7.01e-05 ***
## V6.Married.spouse.absent -5.586e-01 3.806e-01 -1.468 0.142147
## V6.Never.married -8.128e-01 1.962e-01 -4.143 3.43e-05 ***
## V6.Separated -3.280e-01 2.735e-01 -1.199 0.230452
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -2.085e-01 1.650e-01 -1.263 0.206432
## V7.Armed.Forces -1.677e-01 2.205e+00 -0.076 0.939391
## V7.Craft.repair 7.394e-02 1.413e-01 0.523 0.600816
## V7.Exec.managerial 7.304e-01 1.424e-01 5.130 2.90e-07 ***
## V7.Farming.fishing -1.036e+00 2.129e-01 -4.866 1.14e-06 ***
## V7.Handlers.cleaners -8.105e-01 2.508e-01 -3.232 0.001231 **
## V7.Machine.op.inspct -6.159e-01 1.917e-01 -3.213 0.001312 **
## V7.Other.service -1.049e+00 2.076e-01 -5.053 4.35e-07 ***
## V7.Priv.house.serv -2.332e+01 2.443e+04 -0.001 0.999238
## V7.Prof.specialty 3.653e-01 1.491e-01 2.450 0.014300 *
## V7.Protective.serv 1.828e-01 2.277e-01 0.803 0.422010
## V7.Sales 1.044e-01 1.498e-01 0.697 0.485734
## V7.Tech.support 5.809e-01 2.033e-01 2.857 0.004278 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.270e+00 1.565e-01 -8.118 4.75e-16 ***
## V8.Not.in.family -6.745e-01 4.429e-01 -1.523 0.127787
## V8.Other.relative -1.488e+00 4.241e-01 -3.509 0.000450 ***
## V8.Own.child -1.820e+00 4.592e-01 -3.964 7.36e-05 ***
## V8.Unmarried -6.297e-01 4.557e-01 -1.382 0.166968
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.743e-01 3.185e-01 -0.547 0.584288
## V9.Asian.Pac.Islander 1.859e-01 2.667e-01 0.697 0.485786
## V9.Black -9.233e-02 1.164e-01 -0.793 0.427751
## V9.Other 4.619e-01 3.857e-01 1.198 0.231052
## V9.White NA NA NA NA
## V10.Female -8.920e-01 1.133e-01 -7.872 3.49e-15 ***
## V10.Male NA NA NA NA
## V11 3.005e-04 1.528e-05 19.668 < 2e-16 ***
## V12 6.511e-04 5.583e-05 11.662 < 2e-16 ***
## V13 3.122e-02 2.322e-03 13.445 < 2e-16 ***
## V14.. -1.280e+00 1.212e+00 -1.056 0.290857
## V14.Cambodia 3.755e-01 1.680e+00 0.223 0.823174
## V14.Canada -9.345e-01 1.246e+00 -0.750 0.453395
## V14.China -2.114e+00 1.306e+00 -1.619 0.105527
## V14.Columbia -2.620e+00 1.497e+00 -1.749 0.080212 .
## V14.Cuba -1.492e+00 1.276e+00 -1.169 0.242393
## V14.Dominican.Republic -2.395e+01 4.126e+04 -0.001 0.999537
## V14.Ecuador -2.571e+00 1.971e+00 -1.305 0.191940
## V14.El.Salvador -1.926e+00 1.451e+00 -1.327 0.184408
## V14.England -1.037e+00 1.265e+00 -0.819 0.412585
## V14.France -9.263e-01 1.358e+00 -0.682 0.495292
## V14.Germany -6.693e-01 1.267e+00 -0.528 0.597199
## V14.Greece -1.402e+00 1.463e+00 -0.958 0.338079
## V14.Guatemala -1.357e+00 1.556e+00 -0.872 0.383148
## V14.Haiti -2.065e+00 1.898e+00 -1.088 0.276500
## V14.Holand.Netherlands -2.328e+01 3.075e+05 0.000 0.999940
## V14.Honduras -1.547e+00 3.232e+00 -0.479 0.632153
## V14.Hong -6.725e-01 1.451e+00 -0.463 0.643089
## V14.Hungary -5.827e-01 1.639e+00 -0.355 0.722223
## V14.India -1.689e+00 1.283e+00 -1.317 0.187963
## V14.Iran -1.879e+00 1.447e+00 -1.298 0.194142
## V14.Ireland -5.711e-01 1.894e+00 -0.302 0.762982
## V14.Italy 5.082e-01 1.255e+00 0.405 0.685609
## V14.Jamaica -2.496e+00 1.621e+00 -1.540 0.123581
## V14.Japan -1.754e-01 1.370e+00 -0.128 0.898101
## V14.Laos -7.329e-01 1.570e+00 -0.467 0.640724
## V14.Mexico -1.573e+00 1.219e+00 -1.291 0.196677
## V14.Nicaragua -1.716e+00 1.449e+00 -1.184 0.236327
## V14.Outlying.US.Guam.USVI.etc. -2.564e+01 1.160e+05 0.000 0.999824
## V14.Peru -1.265e+00 1.641e+00 -0.771 0.440959
## V14.Philippines -1.387e+00 1.263e+00 -1.098 0.272027
## V14.Poland -7.809e-01 1.367e+00 -0.571 0.567687
## V14.Portugal -2.234e+00 1.721e+00 -1.298 0.194220
## V14.Puerto.Rico -4.165e-01 1.305e+00 -0.319 0.749604
## V14.Scotland 2.571e-01 1.775e+00 0.145 0.884829
## V14.South -2.715e+00 1.388e+00 -1.956 0.050460 .
## V14.Taiwan -5.682e-01 1.398e+00 -0.406 0.684404
## V14.Thailand -1.812e+00 1.906e+00 -0.951 0.341798
## V14.Trinadad.Tobago -1.127e+00 1.723e+00 -0.654 0.512878
## V14.United.States -9.105e-01 1.193e+00 -0.763 0.445470
## V14.Vietnam -1.315e+00 1.463e+00 -0.899 0.368657
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 15320.8 on 13386 degrees of freedom
## Residual deviance: 8281.5 on 13287 degrees of freedom
## AIC: 8481.5
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n1_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6966 980
## >50K 450 1372
##
## Accuracy : 0.8536
## 95% CI : (0.8464, 0.8606)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5662
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9393
## Specificity : 0.5833
## Pos Pred Value : 0.8767
## Neg Pred Value : 0.7530
## Prevalence : 0.7592
## Detection Rate : 0.7131
## Detection Prevalence : 0.8135
## Balanced Accuracy : 0.7613
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6966 980
## >50K 450 1372
##
## Accuracy : 0.8536
## 95% CI : (0.8464, 0.8606)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5662
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9393
## Specificity : 0.5833
## Pos Pred Value : 0.8767
## Neg Pred Value : 0.7530
## Prevalence : 0.7592
## Detection Rate : 0.7131
## Detection Prevalence : 0.8135
## Balanced Accuracy : 0.7613
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.536036e-01 5.662164e-01 8.464367e-01 8.605589e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.204810e-117 1.818713e-44
ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9393204 0.5833333 0.8766675
## Neg Pred Value Precision Recall
## 0.7530187 0.8766675 0.9393204
## F1 Prevalence Detection Rate
## 0.9069132 0.7592138 0.7131450
## Detection Prevalence Balanced Accuracy
## 0.8134726 0.7613269
ad_tda_kde_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n1_lr_fit_re)
diff_tda_kde_5.50.5_lr_n1_3_fold
## Accuracy
## 1 -0.006817643
## 2 -0.017023478
## 3 -0.003382094
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n1_3_fold$probRight
bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.3270667
##
## $winRope
## [1] 0.6729333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_3_fold
## $left
## [1] 0.4314692
##
## $rope
## [1] 0.5403568
##
## $right
## [1] 0.02817394
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
#bf_tda_kde_5.50.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold)
## t = -2.2152, df = 2, p-value = 0.1571
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.026699808 0.008550997
## sample estimates:
## mean of x
## -0.009074405
### Test set diff
diff_tda_kde_5.50.5_lr.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n1_test
## Accuracy
## -0.02579853
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n1_test_odds.left<-bst_tda_kde_5.50.5_lr.n1_test$probLeft/bst_tda_kde_5.50.5_lr.n1_test$probRight
bst_tda_kde_5.50.5_lr.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_test
## $winLeft
## [1] 0.8412667
##
## $winRope
## [1] 0.1587333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n1_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n1_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n2_LrFit0
## Generalized Linear Model
##
## 12638 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8425, 8425, 8426
## Resampling results:
##
## Accuracy Kappa
## 0.8424592 0.5950167
Adult_TDA_KDE_5.50.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8438168 0.5961240 Fold1
## 2 0.8414432 0.5937312 Fold2
## 3 0.8421178 0.5951950 Fold3
ad_tda_kde_5.50.5_n2_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (15 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.377e+12 5.353e+12 0.444 0.656994
## V1 4.019e-02 2.998e-03 13.407 < 2e-16 ***
## V2.. -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Federal.gov -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Local.gov -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Never.worked -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Private -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Self.emp.inc -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Self.emp.not.inc -2.377e+12 5.353e+12 -0.444 0.656994
## V2.State.gov -2.377e+12 5.353e+12 -0.444 0.656994
## V2.Without.pay -2.377e+12 5.353e+12 -0.444 0.656994
## V3 1.103e-06 3.002e-07 3.673 0.000240 ***
## V4.10th -1.293e+00 6.071e-01 -2.129 0.033222 *
## V4.11th -7.382e-01 2.070e-01 -3.566 0.000363 ***
## V4.12th -6.992e-01 3.145e-01 -2.223 0.026183 *
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 2.497e-01 1.403e-01 1.779 0.075165 .
## V4.Assoc.voc 1.348e-01 1.431e-01 0.942 0.346329
## V4.Bachelors 9.199e-01 9.162e-02 10.040 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -4.506e-01 8.894e-02 -5.067 4.05e-07 ***
## V4.Masters 1.352e+00 1.141e-01 11.857 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 2.484e+00 2.158e-01 11.509 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -4.044e-02 2.329e-01 -0.174 0.862155
## V6.Married.AF.spouse 3.066e+00 9.421e-01 3.254 0.001137 **
## V6.Married.civ.spouse 2.088e+00 5.310e-01 3.932 8.43e-05 ***
## V6.Married.spouse.absent 2.967e-02 3.779e-01 0.079 0.937413
## V6.Never.married -5.803e-01 2.445e-01 -2.374 0.017602 *
## V6.Separated -2.802e-01 3.178e-01 -0.882 0.377909
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.207e-02 1.627e-01 0.136 0.892080
## V7.Armed.Forces -2.415e+01 3.607e+05 0.000 0.999947
## V7.Craft.repair 1.250e-01 1.443e-01 0.867 0.386187
## V7.Exec.managerial 8.952e-01 1.436e-01 6.236 4.49e-10 ***
## V7.Farming.fishing -6.954e-01 2.187e-01 -3.180 0.001473 **
## V7.Handlers.cleaners -4.394e-01 2.579e-01 -1.704 0.088433 .
## V7.Machine.op.inspct -4.405e-01 1.941e-01 -2.270 0.023203 *
## V7.Other.service -1.026e+00 2.122e-01 -4.836 1.32e-06 ***
## V7.Priv.house.serv -3.433e+00 2.526e+00 -1.359 0.174160
## V7.Prof.specialty 5.321e-01 1.509e-01 3.527 0.000420 ***
## V7.Protective.serv 7.123e-01 2.190e-01 3.253 0.001142 **
## V7.Sales 2.879e-01 1.495e-01 1.926 0.054125 .
## V7.Tech.support 5.911e-01 1.928e-01 3.066 0.002173 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.440e+00 1.490e-01 -9.669 < 2e-16 ***
## V8.Not.in.family -8.639e-01 4.964e-01 -1.740 0.081799 .
## V8.Other.relative -1.702e+00 4.234e-01 -4.021 5.81e-05 ***
## V8.Own.child -1.897e+00 5.021e-01 -3.777 0.000159 ***
## V8.Unmarried -9.347e-01 5.071e-01 -1.843 0.065260 .
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -5.421e-03 3.121e-01 -0.017 0.986139
## V9.Asian.Pac.Islander -1.005e-01 2.368e-01 -0.425 0.671122
## V9.Black 2.333e-03 1.215e-01 0.019 0.984688
## V9.Other 1.334e-01 4.064e-01 0.328 0.742657
## V9.White NA NA NA NA
## V10.Female -8.916e-01 1.120e-01 -7.960 1.72e-15 ***
## V10.Male NA NA NA NA
## V11 3.295e-04 1.625e-05 20.279 < 2e-16 ***
## V12 7.852e-04 6.169e-05 12.727 < 2e-16 ***
## V13 2.469e-02 2.664e-03 9.267 < 2e-16 ***
## V14.. -4.861e-01 9.129e-01 -0.532 0.594403
## V14.Cambodia 1.803e+00 1.339e+00 1.346 0.178193
## V14.Canada -5.672e-02 9.926e-01 -0.057 0.954429
## V14.China -6.112e-01 1.074e+00 -0.569 0.569260
## V14.Columbia -2.500e+01 6.303e+04 0.000 0.999684
## V14.Cuba 4.934e-02 1.016e+00 0.049 0.961261
## V14.Dominican.Republic -2.431e+01 6.589e+04 0.000 0.999706
## V14.Ecuador -1.085e+00 1.642e+00 -0.661 0.508921
## V14.El.Salvador -2.494e-01 1.173e+00 -0.213 0.831555
## V14.England 5.221e-01 9.807e-01 0.532 0.594445
## V14.France 6.892e-01 1.203e+00 0.573 0.566757
## V14.Germany 7.369e-01 9.650e-01 0.764 0.445097
## V14.Greece -1.341e+00 1.203e+00 -1.115 0.264954
## V14.Guatemala -7.413e-01 2.216e+00 -0.334 0.738034
## V14.Haiti -5.211e-01 1.222e+00 -0.427 0.669732
## V14.Holand.Netherlands -2.230e+01 3.364e+05 0.000 0.999947
## V14.Honduras -2.309e+01 1.684e+05 0.000 0.999891
## V14.Hong 1.743e+00 1.363e+00 1.278 0.201085
## V14.Hungary 9.291e-01 1.599e+00 0.581 0.561302
## V14.India -3.241e-01 9.738e-01 -0.333 0.739287
## V14.Iran -9.642e-02 1.249e+00 -0.077 0.938485
## V14.Ireland -2.406e+01 1.255e+05 0.000 0.999847
## V14.Italy 9.493e-01 1.030e+00 0.921 0.356945
## V14.Jamaica -1.946e+00 1.368e+00 -1.423 0.154862
## V14.Japan 4.816e-01 1.096e+00 0.440 0.660258
## V14.Laos -2.472e+01 1.183e+05 0.000 0.999833
## V14.Mexico -6.107e-01 9.836e-01 -0.621 0.534691
## V14.Nicaragua -2.426e+01 8.981e+04 0.000 0.999785
## V14.Outlying.US.Guam.USVI.etc. -2.428e+01 1.889e+05 0.000 0.999897
## V14.Peru -3.972e-01 1.436e+00 -0.277 0.782121
## V14.Philippines 6.763e-01 9.607e-01 0.704 0.481468
## V14.Poland 3.535e-01 1.087e+00 0.325 0.745094
## V14.Portugal -2.412e+01 1.266e+05 0.000 0.999848
## V14.Puerto.Rico -3.442e-01 1.140e+00 -0.302 0.762585
## V14.Scotland 8.816e-03 1.404e+00 0.006 0.994991
## V14.South -1.041e+00 1.069e+00 -0.974 0.330195
## V14.Taiwan 1.561e-01 1.085e+00 0.144 0.885539
## V14.Thailand -8.911e-01 1.498e+00 -0.595 0.551914
## V14.Trinadad.Tobago -3.699e-01 1.763e+00 -0.210 0.833792
## V14.United.States 1.391e-01 8.881e-01 0.157 0.875548
## V14.Vietnam -1.970e+00 1.455e+00 -1.354 0.175700
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 14967.3 on 12637 degrees of freedom
## Residual deviance: 8297.9 on 12544 degrees of freedom
## AIC: 8485.9
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n2_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6907 966
## >50K 509 1386
##
## Accuracy : 0.849
## 95% CI : (0.8417, 0.856)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5576
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9314
## Specificity : 0.5893
## Pos Pred Value : 0.8773
## Neg Pred Value : 0.7314
## Prevalence : 0.7592
## Detection Rate : 0.7071
## Detection Prevalence : 0.8060
## Balanced Accuracy : 0.7603
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6907 966
## >50K 509 1386
##
## Accuracy : 0.849
## 95% CI : (0.8417, 0.856)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5576
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9314
## Specificity : 0.5893
## Pos Pred Value : 0.8773
## Neg Pred Value : 0.7314
## Prevalence : 0.7592
## Detection Rate : 0.7071
## Detection Prevalence : 0.8060
## Balanced Accuracy : 0.7603
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.489967e-01 5.576442e-01 8.417411e-01 8.560436e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.652237e-106 1.630512e-32
ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9313646 0.5892857 0.8773022
## Neg Pred Value Precision Recall
## 0.7313984 0.8773022 0.9313646
## F1 Prevalence Detection Rate
## 0.9035254 0.7592138 0.7071048
## Detection Prevalence Balanced Accuracy
## 0.8059992 0.7603252
ad_tda_kde_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n2_lr_fit_re)
diff_tda_kde_5.50.5_lr_n2_3_fold
## Accuracy
## 1 0.007308687
## 2 0.007990909
## 3 0.007447916
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n2_3_fold$probRight
bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_3_fold
## $left
## [1] 9.338029e-05
##
## $rope
## [1] 0.9950378
##
## $right
## [1] 0.004868857
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
#bf_tda_kde_5.50.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold)
## t = 36.433, df = 2, p-value = 0.0007525
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.006687034 0.008477974
## sample estimates:
## mean of x
## 0.007582504
### Test set diff
diff_tda_kde_5.50.5_lr.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n2_test
## Accuracy
## -0.02119165
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n2_test_odds.left<-bst_tda_kde_5.50.5_lr.n2_test$probLeft/bst_tda_kde_5.50.5_lr.n2_test$probRight
bst_tda_kde_5.50.5_lr.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0.8423333
##
## $winRope
## [1] 0.1576667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n2_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n3_LrFit0
## Generalized Linear Model
##
## 11634 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7757, 7756, 7755
## Resampling results:
##
## Accuracy Kappa
## 0.8343636 0.5730016
Adult_TDA_KDE_5.50.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8279598 0.5554974 Fold1
## 2 0.8334193 0.5727216 Fold2
## 3 0.8417118 0.5907857 Fold3
ad_tda_kde_5.50.5_n3_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (18 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.210e+13 1.132e+13 -1.068 0.285486
## V1 5.716e-02 3.707e-03 15.419 < 2e-16 ***
## V2.. 1.210e+13 1.132e+13 1.068 0.285486
## V2.Federal.gov 1.210e+13 1.132e+13 1.068 0.285486
## V2.Local.gov 1.210e+13 1.132e+13 1.068 0.285486
## V2.Never.worked -4.492e+15 1.132e+13 -396.606 < 2e-16 ***
## V2.Private 1.210e+13 1.132e+13 1.068 0.285486
## V2.Self.emp.inc 1.210e+13 1.132e+13 1.068 0.285486
## V2.Self.emp.not.inc 1.210e+13 1.132e+13 1.068 0.285486
## V2.State.gov 1.210e+13 1.132e+13 1.068 0.285486
## V2.Without.pay 1.210e+13 1.132e+13 1.068 0.285486
## V3 1.058e-06 4.265e-07 2.480 0.013122 *
## V4.10th NA NA NA NA
## V4.11th -2.141e+01 1.391e+05 0.000 0.999877
## V4.12th -3.019e-01 3.262e-01 -0.926 0.354645
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 4.043e-01 1.394e-01 2.901 0.003723 **
## V4.Assoc.voc 3.090e-01 1.336e-01 2.313 0.020710 *
## V4.Bachelors 1.133e+00 9.265e-02 12.228 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -2.567e-01 8.674e-02 -2.959 0.003087 **
## V4.Masters 1.542e+00 1.396e-01 11.048 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.761e-02 3.077e-01 -0.090 0.928514
## V6.Married.AF.spouse 3.429e+00 1.082e+00 3.170 0.001525 **
## V6.Married.civ.spouse 2.257e+00 5.151e-01 4.382 1.18e-05 ***
## V6.Married.spouse.absent 1.001e-01 4.357e-01 0.230 0.818365
## V6.Never.married -3.595e-01 3.181e-01 -1.130 0.258384
## V6.Separated -2.111e-01 3.891e-01 -0.543 0.587345
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.406e-01 1.626e-01 1.480 0.139002
## V7.Armed.Forces -2.416e+01 1.729e+05 0.000 0.999888
## V7.Craft.repair 1.624e-01 1.439e-01 1.129 0.258979
## V7.Exec.managerial 1.016e+00 1.458e-01 6.970 3.17e-12 ***
## V7.Farming.fishing -8.835e-01 2.453e-01 -3.602 0.000316 ***
## V7.Handlers.cleaners -4.339e-01 2.492e-01 -1.741 0.081600 .
## V7.Machine.op.inspct -5.824e-02 1.761e-01 -0.331 0.740869
## V7.Other.service -6.660e-01 2.097e-01 -3.176 0.001495 **
## V7.Priv.house.serv -3.271e+00 2.440e+00 -1.341 0.180045
## V7.Prof.specialty 6.993e-01 1.540e-01 4.541 5.60e-06 ***
## V7.Protective.serv 1.032e+00 2.144e-01 4.812 1.49e-06 ***
## V7.Sales 6.145e-01 1.496e-01 4.106 4.02e-05 ***
## V7.Tech.support 7.322e-01 1.889e-01 3.876 0.000106 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.456e+00 1.601e-01 -9.091 < 2e-16 ***
## V8.Not.in.family -9.264e-01 4.390e-01 -2.110 0.034857 *
## V8.Other.relative -1.820e+00 3.930e-01 -4.629 3.67e-06 ***
## V8.Own.child -2.250e+00 4.251e-01 -5.293 1.20e-07 ***
## V8.Unmarried -1.128e+00 4.525e-01 -2.493 0.012654 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -4.874e-01 3.861e-01 -1.262 0.206910
## V9.Asian.Pac.Islander 4.647e-02 2.241e-01 0.207 0.835706
## V9.Black -1.506e-01 1.218e-01 -1.237 0.216218
## V9.Other -6.531e-01 4.634e-01 -1.409 0.158715
## V9.White NA NA NA NA
## V10.Female -8.174e-01 1.260e-01 -6.489 8.64e-11 ***
## V10.Male NA NA NA NA
## V11 3.215e-04 1.673e-05 19.219 < 2e-16 ***
## V12 6.916e-04 6.360e-05 10.875 < 2e-16 ***
## V13 2.419e-02 2.892e-03 8.366 < 2e-16 ***
## V14.. -4.034e-01 1.001e+00 -0.403 0.687010
## V14.Cambodia 1.621e+00 1.309e+00 1.239 0.215505
## V14.Canada 7.824e-01 1.077e+00 0.726 0.467623
## V14.China -9.027e-01 1.176e+00 -0.768 0.442732
## V14.Columbia -2.441e+01 5.758e+04 0.000 0.999662
## V14.Cuba 1.205e+00 1.123e+00 1.073 0.283390
## V14.Dominican.Republic -7.977e-01 1.486e+00 -0.537 0.591305
## V14.Ecuador -1.104e+00 1.607e+00 -0.687 0.492121
## V14.El.Salvador 8.271e-02 1.280e+00 0.065 0.948471
## V14.England 4.644e-01 1.095e+00 0.424 0.671504
## V14.France 1.409e+00 1.308e+00 1.078 0.281142
## V14.Germany 4.124e-01 1.048e+00 0.394 0.693942
## V14.Greece -1.840e+00 1.297e+00 -1.419 0.155846
## V14.Guatemala -2.407e+01 9.554e+04 0.000 0.999799
## V14.Haiti -6.195e-02 1.258e+00 -0.049 0.960732
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.330e+01 1.741e+05 0.000 0.999893
## V14.Hong -2.201e+01 2.388e+05 0.000 0.999926
## V14.Hungary -7.904e-01 1.531e+00 -0.516 0.605591
## V14.India -4.675e-01 1.080e+00 -0.433 0.665103
## V14.Iran 1.600e-02 1.127e+00 0.014 0.988670
## V14.Ireland 1.424e+00 1.373e+00 1.037 0.299660
## V14.Italy -6.461e-02 1.132e+00 -0.057 0.954474
## V14.Jamaica 1.388e-02 1.222e+00 0.011 0.990940
## V14.Japan 4.253e-01 1.167e+00 0.364 0.715511
## V14.Laos -2.419e+01 1.483e+05 0.000 0.999870
## V14.Mexico -7.942e-01 1.153e+00 -0.689 0.490896
## V14.Nicaragua -2.163e+01 1.614e+05 0.000 0.999893
## V14.Outlying.US.Guam.USVI.etc. -2.304e+01 1.849e+05 0.000 0.999901
## V14.Peru -1.213e+00 1.633e+00 -0.743 0.457765
## V14.Philippines 1.096e+00 1.052e+00 1.042 0.297548
## V14.Poland -1.086e-01 1.139e+00 -0.095 0.923996
## V14.Portugal 5.104e-01 1.356e+00 0.376 0.706689
## V14.Puerto.Rico -1.235e+00 1.178e+00 -1.048 0.294454
## V14.Scotland -5.170e-01 1.647e+00 -0.314 0.753623
## V14.South -1.024e+00 1.172e+00 -0.873 0.382456
## V14.Taiwan -4.563e-01 1.221e+00 -0.374 0.708564
## V14.Thailand -8.318e-01 1.654e+00 -0.503 0.614977
## V14.Trinadad.Tobago -4.545e-01 1.754e+00 -0.259 0.795488
## V14.United.States 1.502e-01 9.798e-01 0.153 0.878157
## V14.Vietnam -2.492e+00 1.491e+00 -1.671 0.094637 .
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13708.0 on 11633 degrees of freedom
## Residual deviance: 7850.4 on 11543 degrees of freedom
## AIC: 8032.4
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n3_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6764 905
## >50K 652 1447
##
## Accuracy : 0.8406
## 95% CI : (0.8332, 0.8478)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5474
##
## Mcnemar's Test P-Value : 1.698e-10
##
## Sensitivity : 0.9121
## Specificity : 0.6152
## Pos Pred Value : 0.8820
## Neg Pred Value : 0.6894
## Prevalence : 0.7592
## Detection Rate : 0.6925
## Detection Prevalence : 0.7851
## Balanced Accuracy : 0.7637
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6764 905
## >50K 652 1447
##
## Accuracy : 0.8406
## 95% CI : (0.8332, 0.8478)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5474
##
## Mcnemar's Test P-Value : 1.698e-10
##
## Sensitivity : 0.9121
## Specificity : 0.6152
## Pos Pred Value : 0.8820
## Neg Pred Value : 0.6894
## Prevalence : 0.7592
## Detection Rate : 0.6925
## Detection Prevalence : 0.7851
## Balanced Accuracy : 0.7637
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.406020e-01 5.474073e-01 8.331903e-01 8.478099e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.696017e-86 1.698345e-10
ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9120820 0.6152211 0.8819924
## Neg Pred Value Precision Recall
## 0.6893759 0.8819924 0.9120820
## F1 Prevalence Detection Rate
## 0.8967849 0.7592138 0.6924652
## Detection Prevalence Balanced Accuracy
## 0.7851147 0.7636515
ad_tda_kde_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n3_lr_fit_re)
diff_tda_kde_5.50.5_lr_n3_3_fold
## Accuracy
## 1 0.007308687
## 2 0.007990909
## 3 0.007447916
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n3_3_fold$probRight
bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_3_fold
## $left
## [1] 9.338029e-05
##
## $rope
## [1] 0.9950378
##
## $right
## [1] 0.004868857
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
#bf_tda_kde_5.50.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold)
## t = 36.433, df = 2, p-value = 0.0007525
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.006687034 0.008477974
## sample estimates:
## mean of x
## 0.007582504
### Test set diff
diff_tda_kde_5.50.5_lr.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n3_test
## Accuracy
## -0.01279689
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n3_test_odds.left<-bst_tda_kde_5.50.5_lr.n3_test$probLeft/bst_tda_kde_5.50.5_lr.n3_test$probRight
bst_tda_kde_5.50.5_lr.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0.8408
##
## $winRope
## [1] 0.1592
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n3_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n4_LrFit0
## Generalized Linear Model
##
## 10038 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6692, 6691, 6693
## Resampling results:
##
## Accuracy Kappa
## 0.8565451 0.5372228
Adult_TDA_KDE_5.50.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8607292 0.5522725 Fold1
## 2 0.8547953 0.5272775 Fold2
## 3 0.8541106 0.5321185 Fold3
ad_tda_kde_5.50.5_n4_lr_fit_re<-Adult_TDA_KDE_5.50.5_n4_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (19 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.260e+12 1.517e+13 -0.083 0.933819
## V1 7.282e-02 4.998e-03 14.569 < 2e-16 ***
## V2.. 7.797e+12 2.097e+13 0.372 0.710086
## V2.Federal.gov 7.797e+12 2.096e+13 0.372 0.709945
## V2.Local.gov 7.797e+12 2.098e+13 0.372 0.710183
## V2.Never.worked -4.496e+15 2.096e+13 -214.464 < 2e-16 ***
## V2.Private 7.797e+12 2.097e+13 0.372 0.710089
## V2.Self.emp.inc 7.797e+12 2.095e+13 0.372 0.709801
## V2.Self.emp.not.inc 7.797e+12 2.096e+13 0.372 0.709943
## V2.State.gov 7.797e+12 2.096e+13 0.372 0.709964
## V2.Without.pay -4.496e+15 2.099e+13 -214.207 < 2e-16 ***
## V3 1.580e-06 6.297e-07 2.509 0.012114 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th -2.427e-01 4.229e-01 -0.574 0.566027
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 4.819e-01 1.934e-01 2.491 0.012730 *
## V4.Assoc.voc 4.399e-01 1.318e-01 3.337 0.000846 ***
## V4.Bachelors 1.112e+00 1.107e-01 10.048 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -2.108e-01 8.542e-02 -2.468 0.013604 *
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -3.636e-01 5.024e-01 -0.724 0.469302
## V6.Married.AF.spouse 2.500e+00 1.009e+00 2.478 0.013212 *
## V6.Married.civ.spouse 1.809e+00 6.901e-01 2.621 0.008767 **
## V6.Married.spouse.absent -1.916e-02 6.769e-01 -0.028 0.977419
## V6.Never.married -4.782e-01 5.110e-01 -0.936 0.349334
## V6.Separated -6.317e-01 5.843e-01 -1.081 0.279634
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 3.182e-01 1.760e-01 1.808 0.070658 .
## V7.Armed.Forces -1.233e+01 4.244e+02 -0.029 0.976824
## V7.Craft.repair 2.127e-01 1.511e-01 1.408 0.159250
## V7.Exec.managerial 9.917e-01 1.593e-01 6.226 4.80e-10 ***
## V7.Farming.fishing -8.439e-01 2.869e-01 -2.942 0.003263 **
## V7.Handlers.cleaners -5.814e-01 2.561e-01 -2.270 0.023184 *
## V7.Machine.op.inspct -7.993e-02 1.823e-01 -0.439 0.660993
## V7.Other.service -3.570e-01 2.137e-01 -1.670 0.094864 .
## V7.Priv.house.serv -1.786e+01 3.461e+03 -0.005 0.995883
## V7.Prof.specialty 8.173e-01 1.771e-01 4.614 3.96e-06 ***
## V7.Protective.serv 9.216e-01 2.343e-01 3.933 8.39e-05 ***
## V7.Sales 6.051e-01 1.617e-01 3.743 0.000182 ***
## V7.Tech.support 8.322e-01 2.095e-01 3.972 7.13e-05 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.545e+00 2.175e-01 -7.105 1.20e-12 ***
## V8.Not.in.family -1.300e+00 5.183e-01 -2.508 0.012158 *
## V8.Other.relative -2.250e+00 5.009e-01 -4.492 7.06e-06 ***
## V8.Own.child -2.508e+00 4.923e-01 -5.093 3.52e-07 ***
## V8.Unmarried -1.627e+00 5.374e-01 -3.027 0.002473 **
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.181e+00 5.091e-01 -2.319 0.020394 *
## V9.Asian.Pac.Islander 4.669e-01 2.799e-01 1.668 0.095275 .
## V9.Black -3.876e-01 1.459e-01 -2.657 0.007890 **
## V9.Other -1.463e+00 6.704e-01 -2.183 0.029036 *
## V9.White NA NA NA NA
## V10.Female -8.349e-01 1.911e-01 -4.370 1.24e-05 ***
## V10.Male NA NA NA NA
## V11 3.266e-04 2.052e-05 15.916 < 2e-16 ***
## V12 5.521e-04 7.073e-05 7.805 5.93e-15 ***
## V13 3.011e-02 3.387e-03 8.890 < 2e-16 ***
## V14.. -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Cambodia -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Canada -6.537e+12 2.170e+13 -0.301 0.763212
## V14.China -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Columbia -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Cuba -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Dominican.Republic -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Ecuador -6.537e+12 2.170e+13 -0.301 0.763212
## V14.El.Salvador -6.537e+12 2.170e+13 -0.301 0.763212
## V14.England -6.537e+12 2.170e+13 -0.301 0.763212
## V14.France -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Germany -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Greece -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Guatemala -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Haiti -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Hong -4.510e+15 2.170e+13 -207.852 < 2e-16 ***
## V14.Hungary -6.537e+12 2.170e+13 -0.301 0.763212
## V14.India -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Iran -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Ireland -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Italy -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Jamaica -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Japan -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Laos -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Mexico -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Nicaragua -4.510e+15 2.170e+13 -207.852 < 2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -4.510e+15 2.170e+13 -207.852 < 2e-16 ***
## V14.Peru -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Philippines -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Poland -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Portugal -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Puerto.Rico -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Scotland -6.537e+12 2.170e+13 -0.301 0.763212
## V14.South -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Taiwan -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Thailand -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Trinadad.Tobago -6.537e+12 2.170e+13 -0.301 0.763212
## V14.United.States -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Vietnam -6.537e+12 2.170e+13 -0.301 0.763212
## V14.Yugoslavia -6.537e+12 2.170e+13 -0.301 0.763212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 10294.3 on 10037 degrees of freedom
## Residual deviance: 5791.2 on 9948 degrees of freedom
## AIC: 5971.2
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n4_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6609 851
## >50K 807 1501
##
## Accuracy : 0.8303
## 95% CI : (0.8227, 0.8377)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5328
##
## Mcnemar's Test P-Value : 0.291
##
## Sensitivity : 0.8912
## Specificity : 0.6382
## Pos Pred Value : 0.8859
## Neg Pred Value : 0.6503
## Prevalence : 0.7592
## Detection Rate : 0.6766
## Detection Prevalence : 0.7637
## Balanced Accuracy : 0.7647
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6609 851
## >50K 807 1501
##
## Accuracy : 0.8303
## 95% CI : (0.8227, 0.8377)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5328
##
## Mcnemar's Test P-Value : 0.291
##
## Sensitivity : 0.8912
## Specificity : 0.6382
## Pos Pred Value : 0.8859
## Neg Pred Value : 0.6503
## Prevalence : 0.7592
## Detection Rate : 0.6766
## Detection Prevalence : 0.7637
## Balanced Accuracy : 0.7647
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.302621e-01 5.327644e-01 8.226681e-01 8.376586e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.475347e-65 2.909546e-01
ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8911812 0.6381803 0.8859249
## Neg Pred Value Precision Recall
## 0.6503466 0.8859249 0.8911812
## F1 Prevalence Detection Rate
## 0.8885453 0.7592138 0.6765971
## Detection Prevalence Balanced Accuracy
## 0.7637183 0.7646808
ad_tda_kde_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n4_lr_fit_re)
diff_tda_kde_5.50.5_lr_n4_3_fold
## Accuracy
## 1 -0.009603785
## 2 -0.005361278
## 3 -0.004544938
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n4_3_fold$probRight
bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_3_fold
## $left
## [1] 0.09659779
##
## $rope
## [1] 0.8974904
##
## $right
## [1] 0.005911817
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
#bf_tda_kde_5.50.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold)
## t = -4.1474, df = 2, p-value = 0.05351
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.013250044 0.000243377
## sample estimates:
## mean of x
## -0.006503333
### Test set diff
diff_tda_kde_5.50.5_lr.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n4_test
## Accuracy
## -0.002457002
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n4_test_odds.left<-bst_tda_kde_5.50.5_lr.n4_test$probLeft/bst_tda_kde_5.50.5_lr.n4_test$probRight
bst_tda_kde_5.50.5_lr.n4_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n4_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.50.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n5_LrFit0
## Generalized Linear Model
##
## 7540 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5026, 5028, 5026
## Resampling results:
##
## Accuracy Kappa
## 0.8661809 0.386747
Adult_TDA_KDE_5.50.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8659507 0.3891890 Fold1
## 2 0.8682325 0.3875662 Fold2
## 3 0.8643596 0.3834859 Fold3
ad_tda_kde_5.50.5_n5_lr_fit_re<-Adult_TDA_KDE_5.50.5_n5_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (23 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.531e+13 1.949e+13 -0.786 0.432144
## V1 7.835e-02 6.899e-03 11.357 < 2e-16 ***
## V2.. 1.531e+13 1.949e+13 0.786 0.432144
## V2.Federal.gov 1.531e+13 1.949e+13 0.786 0.432144
## V2.Local.gov 1.531e+13 1.949e+13 0.786 0.432144
## V2.Never.worked -4.488e+15 1.949e+13 -230.295 < 2e-16 ***
## V2.Private 1.531e+13 1.949e+13 0.786 0.432144
## V2.Self.emp.inc 1.531e+13 1.949e+13 0.786 0.432144
## V2.Self.emp.not.inc 1.531e+13 1.949e+13 0.786 0.432144
## V2.State.gov 1.531e+13 1.949e+13 0.786 0.432144
## V2.Without.pay 1.531e+13 1.949e+13 0.786 0.432144
## V3 2.099e-06 9.540e-07 2.200 0.027778 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th NA NA NA NA
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm NA NA NA NA
## V4.Assoc.voc 4.025e-01 1.816e-01 2.216 0.026663 *
## V4.Bachelors NA NA NA NA
## V4.Doctorate NA NA NA NA
## V4.HS.grad -3.119e-01 8.559e-02 -3.644 0.000268 ***
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.283e-01 1.066e+00 -0.214 0.830365
## V6.Married.AF.spouse 2.486e+00 1.507e+00 1.650 0.098997 .
## V6.Married.civ.spouse 2.141e+00 1.174e+00 1.825 0.068066 .
## V6.Married.spouse.absent -2.802e-01 1.305e+00 -0.215 0.829953
## V6.Never.married -5.760e-01 1.068e+00 -0.539 0.589690
## V6.Separated -4.894e-01 1.139e+00 -0.430 0.667446
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 3.736e-01 2.086e-01 1.791 0.073227 .
## V7.Armed.Forces -2.274e+01 1.525e+05 0.000 0.999881
## V7.Craft.repair 2.768e-01 1.663e-01 1.664 0.096174 .
## V7.Exec.managerial 8.350e-01 1.840e-01 4.537 5.69e-06 ***
## V7.Farming.fishing -3.688e-01 3.161e-01 -1.167 0.243340
## V7.Handlers.cleaners -4.164e-01 2.677e-01 -1.556 0.119827
## V7.Machine.op.inspct 6.750e-02 1.994e-01 0.339 0.734898
## V7.Other.service -3.180e-01 2.459e-01 -1.293 0.196035
## V7.Priv.house.serv -2.393e+01 7.364e+04 0.000 0.999741
## V7.Prof.specialty 1.189e+00 2.334e-01 5.093 3.52e-07 ***
## V7.Protective.serv 8.446e-01 2.740e-01 3.082 0.002056 **
## V7.Sales 4.039e-01 1.859e-01 2.173 0.029765 *
## V7.Tech.support 1.113e+00 2.595e-01 4.291 1.78e-05 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.278e+00 3.035e-01 -4.212 2.53e-05 ***
## V8.Not.in.family -7.738e-01 6.013e-01 -1.287 0.198168
## V8.Other.relative -1.731e+00 6.119e-01 -2.830 0.004662 **
## V8.Own.child -1.756e+00 5.661e-01 -3.101 0.001926 **
## V8.Unmarried -1.727e+00 6.715e-01 -2.573 0.010095 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.486e+00 5.768e-01 -2.576 0.009981 **
## V9.Asian.Pac.Islander 5.260e-01 4.353e-01 1.208 0.226894
## V9.Black -5.663e-01 1.998e-01 -2.834 0.004591 **
## V9.Other -2.357e+00 1.110e+00 -2.124 0.033646 *
## V9.White NA NA NA NA
## V10.Female -6.602e-01 2.631e-01 -2.509 0.012094 *
## V10.Male NA NA NA NA
## V11 3.697e-04 2.660e-05 13.899 < 2e-16 ***
## V12 5.408e-04 8.660e-05 6.244 4.26e-10 ***
## V13 2.854e-02 4.059e-03 7.032 2.04e-12 ***
## V14.. -1.407e+00 1.435e+00 -0.980 0.326858
## V14.Cambodia -2.607e+01 1.507e+05 0.000 0.999862
## V14.Canada -1.840e+00 1.800e+00 -1.022 0.306601
## V14.China -1.393e+00 1.707e+00 -0.816 0.414689
## V14.Columbia -2.562e+01 8.344e+04 0.000 0.999755
## V14.Cuba 3.551e-01 1.558e+00 0.228 0.819681
## V14.Dominican.Republic -1.727e+01 1.399e+03 -0.012 0.990148
## V14.Ecuador -1.909e-03 1.691e+00 -0.001 0.999099
## V14.El.Salvador -2.776e+00 1.783e+00 -1.557 0.119535
## V14.England -6.973e-01 1.660e+00 -0.420 0.674469
## V14.France -2.309e+01 1.625e+05 0.000 0.999887
## V14.Germany -8.534e-01 1.495e+00 -0.571 0.568216
## V14.Greece -1.636e+00 2.115e+00 -0.774 0.439051
## V14.Guatemala -3.395e-01 1.813e+00 -0.187 0.851448
## V14.Haiti -2.321e+01 1.092e+05 0.000 0.999830
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.470e+01 2.452e+05 0.000 0.999920
## V14.Hong -2.668e+01 1.398e+05 0.000 0.999848
## V14.Hungary -2.539e+01 3.120e+05 0.000 0.999935
## V14.India -2.833e+00 2.209e+00 -1.282 0.199724
## V14.Iran 2.252e-01 1.758e+00 0.128 0.898083
## V14.Ireland -1.216e+00 1.790e+00 -0.679 0.496897
## V14.Italy -2.567e+00 1.866e+00 -1.376 0.168826
## V14.Jamaica -1.321e-01 1.564e+00 -0.084 0.932709
## V14.Japan -2.613e+01 8.646e+04 0.000 0.999759
## V14.Laos -2.666e+01 1.732e+05 0.000 0.999877
## V14.Mexico -1.881e+00 1.498e+00 -1.255 0.209357
## V14.Nicaragua -2.527e+01 1.007e+05 0.000 0.999800
## V14.Outlying.US.Guam.USVI.etc. -2.399e+01 1.271e+05 0.000 0.999849
## V14.Peru -2.576e+01 1.247e+05 0.000 0.999835
## V14.Philippines -7.225e-01 1.581e+00 -0.457 0.647625
## V14.Poland -1.867e+00 1.771e+00 -1.054 0.291869
## V14.Portugal -6.629e-01 1.856e+00 -0.357 0.720983
## V14.Puerto.Rico -2.366e+00 1.781e+00 -1.329 0.183994
## V14.Scotland -2.715e+01 2.557e+05 0.000 0.999915
## V14.South -1.428e+00 1.710e+00 -0.835 0.403544
## V14.Taiwan -2.466e+00 1.940e+00 -1.271 0.203759
## V14.Thailand -2.201e+00 2.037e+00 -1.080 0.280071
## V14.Trinadad.Tobago -2.743e+01 2.557e+05 0.000 0.999914
## V14.United.States -1.026e+00 1.386e+00 -0.740 0.459267
## V14.Vietnam -2.153e+00 1.842e+00 -1.169 0.242491
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6474.2 on 7539 degrees of freedom
## Residual deviance: 4003.6 on 7454 degrees of freedom
## AIC: 4175.6
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.50.5_n5_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.50.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6503 882
## >50K 913 1470
##
## Accuracy : 0.8162
## 95% CI : (0.8084, 0.8239)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.4996
##
## Mcnemar's Test P-Value : 0.4789
##
## Sensitivity : 0.8769
## Specificity : 0.6250
## Pos Pred Value : 0.8806
## Neg Pred Value : 0.6169
## Prevalence : 0.7592
## Detection Rate : 0.6657
## Detection Prevalence : 0.7560
## Balanced Accuracy : 0.7509
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6503 882
## >50K 913 1470
##
## Accuracy : 0.8162
## 95% CI : (0.8084, 0.8239)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.4996
##
## Mcnemar's Test P-Value : 0.4789
##
## Sensitivity : 0.8769
## Specificity : 0.6250
## Pos Pred Value : 0.8806
## Neg Pred Value : 0.6169
## Prevalence : 0.7592
## Detection Rate : 0.6657
## Detection Prevalence : 0.7560
## Balanced Accuracy : 0.7509
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.162367e-01 4.996394e-01 8.084112e-01 8.238731e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.955418e-42 4.788888e-01
ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8768878 0.6250000 0.8805687
## Neg Pred Value Precision Recall
## 0.6168695 0.8805687 0.8768878
## F1 Prevalence Detection Rate
## 0.8787244 0.7592138 0.6657453
## Detection Prevalence Balanced Accuracy
## 0.7560401 0.7509439
ad_tda_kde_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n5_lr_fit_re)
diff_tda_kde_5.50.5_lr_n5_3_fold
## Accuracy
## 1 -0.01482523
## 2 -0.01879842
## 3 -0.01479391
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n5_3_fold$probRight
bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.8329
##
## $winRope
## [1] 0.1671
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_3_fold
## $left
## [1] 0.9713858
##
## $rope
## [1] 0.02689802
##
## $right
## [1] 0.001716158
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
#bf_tda_kde_5.50.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold)
## t = -12.138, df = 2, p-value = 0.006719
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02186020 -0.01041818
## sample estimates:
## mean of x
## -0.01613919
### Test set diff
diff_tda_kde_5.50.5_lr.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n5_test
## Accuracy
## 0.01156839
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_lr.n5_test_odds.left<-bst_tda_kde_5.50.5_lr.n5_test$probLeft/bst_tda_kde_5.50.5_lr.n5_test$probRight
bst_tda_kde_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4549333
##
## $winRight
## [1] 0.5450667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n5_test)))
#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n5_test))
#naiveBayes
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Guatemala, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Hungary, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7606282 0.00909498
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
adultNbFit$resample
## Accuracy Kappa Resample
## 1 0.7603317 0.007448036 Fold1
## 2 0.7601685 0.005800114 Fold2
## 3 0.7613846 0.014036789 Fold3
ad_nb_fit_re<-adultNbFit$resample[1]
summary(adultNbFit)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
#varImp (adultNbFit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nb_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2194
## >50K 0 158
##
## Accuracy : 0.7754
## 95% CI : (0.767, 0.7836)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 8.684e-05
##
## Kappa : 0.0986
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06718
## Pos Pred Value : 0.77170
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98382
## Balanced Accuracy : 0.53359
##
## 'Positive' Class : <=50K
##
nb_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.753890e-01 9.857036e-02 7.669804e-01 7.836332e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 8.684169e-05 0.000000e+00
nb_cf_ov_acc<-nb_cf$overall[1]
nb_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 1.00000000 0.06717687 0.77169615
## Neg Pred Value Precision Recall
## 1.00000000 0.77169615 1.00000000
## F1 Prevalence Detection Rate
## 0.87113826 0.75921376 0.75921376
## Detection Prevalence Balanced Accuracy
## 0.98382473 0.53358844
nb_cf_pre_rec_f1<-nb_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Canada, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n1_NbFit0
## Naive Bayes
##
## 4917 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 3278, 3278, 3278
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9733577 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9737645 0 Fold1
## 2 0.9731544 0 Fold2
## 3 0.9731544 0 Fold3
ad_tda_pc_5.50.5_n1_nb_fit_re<-Adult_TDA_PC_5.50.5_n1_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n1_nb_fit_re)
diff_tda_pca_5.50.5_nb_n1_3_fold
## Accuracy
## 1 -0.2134328
## 2 -0.2129859
## 3 -0.2117698
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n1_3_fold$probRight
bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0.9911667
##
## $winRope
## [1] 0.008833333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_3_fold
## $left
## [1] 0.999996
##
## $rope
## [1] 6.869588e-07
##
## $right
## [1] 3.318044e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
#bf_tda_pca_5.50.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold)
## t = -428.12, df = 2, p-value = 5.456e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2148675 -0.2105915
## sample estimates:
## mean of x
## -0.2127295
### Test set diff
diff_tda_pca_5.50.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n1_test
## Accuracy
## 0.5346028
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n1_test_odds.left<-bst_tda_pca_5.50.5_nb.n1_test$probLeft/bst_tda_pca_5.50.5_nb.n1_test$probRight
bst_tda_pca_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1592667
##
## $winRight
## [1] 0.8407333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n1_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n1_test))
##Node2
Adult_TDA_PC_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n2_NbFit0
## Naive Bayes
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8136, 8138, 8138
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.5471151 0.1056197
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.5039312 0.081902576 Fold1
## 2 0.5919371 0.233191922 Fold2
## 3 0.5454769 0.001764559 Fold3
ad_tda_pc_5.50.5_n2_nb_fit_re<-Adult_TDA_PC_5.50.5_n2_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3598 1573
## >50K 3818 779
##
## Accuracy : 0.4481
## 95% CI : (0.4382, 0.458)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1385
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4852
## Specificity : 0.3312
## Pos Pred Value : 0.6958
## Neg Pred Value : 0.1695
## Prevalence : 0.7592
## Detection Rate : 0.3683
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.4082
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3598 1573
## >50K 3818 779
##
## Accuracy : 0.4481
## 95% CI : (0.4382, 0.458)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1385
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4852
## Specificity : 0.3312
## Pos Pred Value : 0.6958
## Neg Pred Value : 0.1695
## Prevalence : 0.7592
## Detection Rate : 0.3683
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.4082
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.480958e-01 -1.384920e-01 4.381994e-01 4.580232e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.863576e-205
ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.4851672 0.3312075 0.6958035
## Neg Pred Value Precision Recall
## 0.1694583 0.6958035 0.4851672
## F1 Prevalence Detection Rate
## 0.5717010 0.7592138 0.3683456
## Detection Prevalence Balanced Accuracy
## 0.5293817 0.4081873
ad_tda_pc_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n2_nb_fit_re)
diff_tda_pca_5.50.5_nb_n2_3_fold
## Accuracy
## 1 0.2564005
## 2 0.1682314
## 3 0.2159077
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n2_3_fold$probRight
bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008766667
##
## $winRight
## [1] 0.9912333
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_3_fold
## $left
## [1] 0.008445021
##
## $rope
## [1] 0.001688794
##
## $right
## [1] 0.9898662
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
#bf_tda_pca_5.50.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold)
## t = 8.3795, df = 2, p-value = 0.01394
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1038801 0.3231463
## sample estimates:
## mean of x
## 0.2135132
### Test set diff
diff_tda_pca_5.50.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n2_test
## Accuracy
## 0.3272932
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n2_test_odds.left<-bst_tda_pca_5.50.5_nb.n2_test$probLeft/bst_tda_pca_5.50.5_nb.n2_test$probRight
bst_tda_pca_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1624
##
## $winRight
## [1] 0.8376
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n2_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n2_test)) #bf_tda_pca_5.50.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n2_test))
##Node3
Adult_TDA_PC_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n3_NbFit0
## Naive Bayes
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8827, 8826
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7719034 0.003051974
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7729436 0.009155921 Fold1
## 2 0.7713574 0.000000000 Fold2
## 3 0.7714092 0.000000000 Fold3
ad_tda_pc_5.50.5_n3_nb_fit_re<-Adult_TDA_PC_5.50.5_n3_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2339
## >50K 0 13
##
## Accuracy : 0.7605
## 95% CI : (0.752, 0.769)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3844
##
## Kappa : 0.0084
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.005527
## Pos Pred Value : 0.760226
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.998669
## Balanced Accuracy : 0.502764
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2339
## >50K 0 13
##
## Accuracy : 0.7605
## 95% CI : (0.752, 0.769)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3844
##
## Kappa : 0.0084
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.005527
## Pos Pred Value : 0.760226
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.998669
## Balanced Accuracy : 0.502764
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.760544636 0.008368689 0.751953582 0.768980076 0.759213759
## AccuracyPValue McnemarPValue
## 0.384402497 0.000000000
ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.000000000 0.005527211 0.760225525
## Neg Pred Value Precision Recall
## 1.000000000 0.760225525 1.000000000
## F1 Prevalence Detection Rate
## 0.863781958 0.759213759 0.759213759
## Detection Prevalence Balanced Accuracy
## 0.998669124 0.502763605
ad_tda_pc_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n3_nb_fit_re)
diff_tda_pca_5.50.5_nb_n3_3_fold
## Accuracy
## 1 -0.01261191
## 2 -0.01118887
## 3 -0.01002458
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n3_3_fold$probRight
bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.8343333
##
## $winRope
## [1] 0.1656667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_3_fold
## $left
## [1] 0.8610332
##
## $rope
## [1] 0.1381444
##
## $right
## [1] 0.0008223578
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
#bf_tda_pca_5.50.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold)
## t = -15.071, df = 2, p-value = 0.004374
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.014494114 -0.008056121
## sample estimates:
## mean of x
## -0.01127512
### Test set diff
diff_tda_pca_5.50.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n3_test
## Accuracy
## 0.01484439
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n3_test_odds.left<-bst_tda_pca_5.50.5_nb.n3_test$probLeft/bst_tda_pca_5.50.5_nb.n3_test$probRight
bst_tda_pca_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1605333
##
## $winRight
## [1] 0.8394667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n3_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n3_test)) #bf_tda_pca_5.50.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n3_test))
##Node4
Adult_TDA_PC_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Ecuador, V14.El.Salvador, V14.France, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n4_NbFit0
## Naive Bayes
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11134, 11133
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9449102 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9448536 0 Fold1
## 2 0.9450234 0 Fold2
## 3 0.9448536 0 Fold3
ad_tda_pc_5.50.5_n4_nb_fit_re<-Adult_TDA_PC_5.50.5_n4_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n4_nb_fit_re)
diff_tda_pca_5.50.5_nb_n4_3_fold
## Accuracy
## 1 -0.1845219
## 2 -0.1848549
## 3 -0.1834690
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n4_3_fold$probRight
bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9914
##
## $winRope
## [1] 0.0086
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9999962
##
## $rope
## [1] 7.47709e-07
##
## $right
## [1] 3.081056e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
#bf_tda_pca_5.50.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold)
## t = -441.22, df = 2, p-value = 5.137e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1860790 -0.1824849
## sample estimates:
## mean of x
## -0.1842819
### Test set diff
diff_tda_pca_5.50.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n4_test
## Accuracy
## 0.01617527
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n4_test_odds.left<-bst_tda_pca_5.50.5_nb.n4_test$probLeft/bst_tda_pca_5.50.5_nb.n4_test$probRight
bst_tda_pca_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4590333
##
## $winRight
## [1] 0.5409667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n4_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n4_test)) #bf_tda_pca_5.50.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n4_test))
##Node5
Adult_TDA_PC_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n5_NbFit0
## Naive Bayes
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9603, 9602, 9603
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9979867 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.50.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9981254 0 Fold1
## 2 0.9979175 0 Fold2
## 3 0.9979171 0 Fold3
ad_tda_pc_5.50.5_n5_nb_fit_re<-Adult_TDA_PC_5.50.5_n5_NbFit0$resample[1]
summary(Adult_TDA_PC_5.50.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.50.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n5_nb_fit_re)
diff_tda_pca_5.50.5_nb_n5_3_fold
## Accuracy
## 1 -0.2377937
## 2 -0.2377490
## 3 -0.2365325
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n5_3_fold$probRight
bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9904667
##
## $winRope
## [1] 0.009533333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9999978
##
## $rope
## [1] 3.415999e-07
##
## $right
## [1] 1.859851e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
#bf_tda_pca_5.50.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold)
## t = -574.5, df = 2, p-value = 3.03e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2391361 -0.2355808
## sample estimates:
## mean of x
## -0.2373584
### Test set diff
diff_tda_pca_5.50.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n5_test
## Accuracy
## 0.01617527
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.50.5_nb.n5_test_odds.left<-bst_tda_pca_5.50.5_nb.n5_test$probLeft/bst_tda_pca_5.50.5_nb.n5_test$probRight
bst_tda_pca_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4539333
##
## $winRight
## [1] 0.5460667
# Bayesian Correlated Test
bct_tda_pca_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n5_test)))
#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n5_test)) #bf_tda_pca_5.50.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Ireland, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n1_NbFit0
## Naive Bayes
##
## 13387 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8925, 8924, 8925
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7465455 0.03200911
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7563873 0.087090012 Fold1
## 2 0.7423258 0.008937319 Fold2
## 3 0.7409234 0.000000000 Fold3
ad_tda_kde_5.50.5_n1_nb_fit_re<-Adult_TDA_KDE_5.50.5_n1_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2190
## >50K 0 162
##
## Accuracy : 0.7758
## 95% CI : (0.7674, 0.784)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 5.889e-05
##
## Kappa : 0.101
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06888
## Pos Pred Value : 0.77202
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98342
## Balanced Accuracy : 0.53444
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2190
## >50K 0 162
##
## Accuracy : 0.7758
## 95% CI : (0.7674, 0.784)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 5.889e-05
##
## Kappa : 0.101
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06888
## Pos Pred Value : 0.77202
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98342
## Balanced Accuracy : 0.53444
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.757985e-01 1.009798e-01 7.673951e-01 7.840372e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.888906e-05 0.000000e+00
ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n1_nb_fit_re)
diff_tda_kde_5.50.5_nb_n1_3_fold
## Accuracy
## 1 0.003944396
## 2 0.017842698
## 3 0.020461222
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n1_3_fold$probRight
bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.2127
##
## $winRight
## [1] 0.7873
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_3_fold
## $left
## [1] 0.02770818
##
## $rope
## [1] 0.2530757
##
## $right
## [1] 0.7192162
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
#bf_tda_kde_5.50.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold)
## t = 2.7477, df = 2, p-value = 0.1109
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.007969344 0.036134888
## sample estimates:
## mean of x
## 0.01408277
### Test set diff
diff_tda_kde_5.50.5_nb.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n1_test
## Accuracy
## 0.05200655
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n1_test_odds.left<-bst_tda_kde_5.50.5_nb.n1_test$probLeft/bst_tda_kde_5.50.5_nb.n1_test$probRight
bst_tda_kde_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1570667
##
## $winRight
## [1] 0.8429333
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n1_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n1_test))
##Node2
Adult_TDA_KDE_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n2_NbFit0
## Naive Bayes
##
## 12206 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8138, 8138, 8136
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.642652 0.2948972
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.6701082 0.3065371 Fold1
## 2 0.7202557 0.4390306 Fold2
## 3 0.5375921 0.1391239 Fold3
ad_tda_kde_5.50.5_n2_nb_fit_re<-Adult_TDA_KDE_5.50.5_n2_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3598 1573
## >50K 3818 779
##
## Accuracy : 0.4481
## 95% CI : (0.4382, 0.458)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1385
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4852
## Specificity : 0.3312
## Pos Pred Value : 0.6958
## Neg Pred Value : 0.1695
## Prevalence : 0.7592
## Detection Rate : 0.3683
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.4082
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3598 1573
## >50K 3818 779
##
## Accuracy : 0.4481
## 95% CI : (0.4382, 0.458)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1385
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.4852
## Specificity : 0.3312
## Pos Pred Value : 0.6958
## Neg Pred Value : 0.1695
## Prevalence : 0.7592
## Detection Rate : 0.3683
## Detection Prevalence : 0.5294
## Balanced Accuracy : 0.4082
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.480958e-01 -1.384920e-01 4.381994e-01 4.580232e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.863576e-205
ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.4851672 0.3312075 0.6958035
## Neg Pred Value Precision Recall
## 0.1694583 0.6958035 0.4851672
## F1 Prevalence Detection Rate
## 0.5717010 0.7592138 0.3683456
## Detection Prevalence Balanced Accuracy
## 0.5293817 0.4081873
ad_tda_kde_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n2_nb_fit_re)
diff_tda_kde_5.50.5_nb_n2_3_fold
## Accuracy
## 1 0.09022350
## 2 0.03991283
## 3 0.22379244
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n2_3_fold$probRight
bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009333333
##
## $winRight
## [1] 0.9906667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_3_fold
## $left
## [1] 0.09040089
##
## $rope
## [1] 0.02481347
##
## $right
## [1] 0.8847856
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
#bf_tda_kde_5.50.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold)
## t = 2.1503, df = 2, p-value = 0.1645
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1180899 0.3540424
## sample estimates:
## mean of x
## 0.1179763
### Test set diff
diff_tda_kde_5.50.5_nb.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n2_test
## Accuracy
## 0.3797093
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n2_test_odds.left<-bst_tda_kde_5.50.5_nb.n2_test$probLeft/bst_tda_kde_5.50.5_nb.n2_test$probRight
bst_tda_kde_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1608
##
## $winRight
## [1] 0.8392
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n2_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n2_test)) #bf_tda_kde_5.50.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n2_test))
##Node3
Adult_TDA_KDE_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n3_NbFit0
## Naive Bayes
##
## 13240 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8827, 8826, 8827
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7725076 0.007081792
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7715840 0.00000000 Fold1
## 2 0.7714092 0.00000000 Fold2
## 3 0.7745298 0.02124538 Fold3
ad_tda_kde_5.50.5_n3_nb_fit_re<-Adult_TDA_KDE_5.50.5_n3_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2339
## >50K 0 13
##
## Accuracy : 0.7605
## 95% CI : (0.752, 0.769)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3844
##
## Kappa : 0.0084
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.005527
## Pos Pred Value : 0.760226
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.998669
## Balanced Accuracy : 0.502764
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2339
## >50K 0 13
##
## Accuracy : 0.7605
## 95% CI : (0.752, 0.769)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3844
##
## Kappa : 0.0084
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.005527
## Pos Pred Value : 0.760226
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.998669
## Balanced Accuracy : 0.502764
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.760544636 0.008368689 0.751953582 0.768980076 0.759213759
## AccuracyPValue McnemarPValue
## 0.384402497 0.000000000
ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.000000000 0.005527211 0.760225525
## Neg Pred Value Precision Recall
## 1.000000000 0.760225525 1.000000000
## F1 Prevalence Detection Rate
## 0.863781958 0.759213759 0.759213759
## Detection Prevalence Balanced Accuracy
## 0.998669124 0.502763605
ad_tda_kde_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n3_nb_fit_re)
diff_tda_kde_5.50.5_nb_n3_3_fold
## Accuracy
## 1 -0.01125229
## 2 -0.01124067
## 3 -0.01314522
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n3_3_fold$probRight
bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.8339333
##
## $winRope
## [1] 0.1660667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_3_fold
## $left
## [1] 0.9381183
##
## $rope
## [1] 0.06132473
##
## $right
## [1] 0.000556949
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
#bf_tda_kde_5.50.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold)
## t = -18.769, df = 2, p-value = 0.002827
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01460265 -0.00915614
## sample estimates:
## mean of x
## -0.01187939
### Test set diff
diff_tda_kde_5.50.5_nb.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n3_test
## Accuracy
## 0.06726044
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n3_test_odds.left<-bst_tda_kde_5.50.5_nb.n3_test$probLeft/bst_tda_kde_5.50.5_nb.n3_test$probRight
bst_tda_kde_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1606333
##
## $winRight
## [1] 0.8393667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n3_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n3_test)) #bf_tda_kde_5.50.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n3_test))
##Node4
Adult_TDA_KDE_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Jamaica, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.India, V14.Iran, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n4_NbFit0
## Naive Bayes
##
## 16700 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11133, 11134, 11133
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9449102 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9448536 0 Fold1
## 2 0.9450234 0 Fold2
## 3 0.9448536 0 Fold3
ad_tda_kde_5.50.5_n4_nb_fit_re<-Adult_TDA_KDE_5.50.5_n4_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n4_nb_fit_re)
diff_tda_kde_5.50.5_nb_n4_3_fold
## Accuracy
## 1 -0.1845219
## 2 -0.1848549
## 3 -0.1834690
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n4_3_fold$probRight
bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9904667
##
## $winRope
## [1] 0.009533333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9999962
##
## $rope
## [1] 7.47709e-07
##
## $right
## [1] 3.081056e-06
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
#bf_tda_kde_5.50.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold)
## t = -441.22, df = 2, p-value = 5.137e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1860790 -0.1824849
## sample estimates:
## mean of x
## -0.1842819
### Test set diff
diff_tda_kde_5.50.5_nb.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n4_test
## Accuracy
## 0.06859132
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n4_test_odds.left<-bst_tda_kde_5.50.5_nb.n4_test$probLeft/bst_tda_kde_5.50.5_nb.n4_test$probRight
bst_tda_kde_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1557333
##
## $winRight
## [1] 0.8442667
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n4_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n4_test)) #bf_tda_kde_5.50.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n4_test))
##Node5
Adult_TDA_KDE_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n5_NbFit0
## Naive Bayes
##
## 14404 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9603, 9602, 9603
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9979867 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.50.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9981254 0 Fold1
## 2 0.9979175 0 Fold2
## 3 0.9979171 0 Fold3
ad_tda_kde_5.50.5_n5_nb_fit_re<-Adult_TDA_KDE_5.50.5_n5_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.50.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.50.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n5_nb_fit_re)
diff_tda_kde_5.50.5_nb_n5_3_fold
## Accuracy
## 1 -0.2377937
## 2 -0.2377490
## 3 -0.2365325
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n5_3_fold$probRight
bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9914
##
## $winRope
## [1] 0.0086
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9999978
##
## $rope
## [1] 3.415999e-07
##
## $right
## [1] 1.859851e-06
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
#bf_tda_kde_5.50.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold)
## t = -574.5, df = 2, p-value = 3.03e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2391361 -0.2355808
## sample estimates:
## mean of x
## -0.2373584
### Test set diff
diff_tda_kde_5.50.5_nb.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n5_test
## Accuracy
## 0.06859132
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.50.5_nb.n5_test_odds.left<-bst_tda_kde_5.50.5_nb.n5_test$probLeft/bst_tda_kde_5.50.5_nb.n5_test$probRight
bst_tda_kde_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1628
##
## $winRight
## [1] 0.8372
# Bayesian Correlated Test
bct_tda_kde_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n5_test)))
#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n5_test)) #bf_tda_kde_5.50.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n5_test))